diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2006-10-11 18:12:39 +0000 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2006-10-11 18:12:39 +0000 |
commit | 183851b06bd6c52f3cae5375f433da720d410447 (patch) | |
tree | a477257decbf3360127f6739c2f9d0ec57a03d39 /maintenance |
MediaWiki 1.7.1 wiederhergestellt
Diffstat (limited to 'maintenance')
232 files changed, 30129 insertions, 0 deletions
diff --git a/maintenance/.htaccess b/maintenance/.htaccess new file mode 100644 index 00000000..3a428827 --- /dev/null +++ b/maintenance/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/maintenance/Doxyfile b/maintenance/Doxyfile new file mode 100644 index 00000000..06f2c096 --- /dev/null +++ b/maintenance/Doxyfile @@ -0,0 +1,279 @@ +# Doxyfile 1.4.6 + +# +# Some placeholders have been added for MediaWiki usage: +# {{OUTPUT_DIRECTORY}} +# {{STRIP_FROM_PATH}} +# {{INPUT}} + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +PROJECT_NAME = MediaWiki +PROJECT_NUMBER = trunk +OUTPUT_DIRECTORY = {{OUTPUT_DIRECTORY}} +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +USE_WINDOWS_ENCODING = NO +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = {{STRIP_FROM_PATH}} +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +DETAILS_AT_TOP = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 8 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +BUILTIN_STL_SUPPORT = NO +DISTRIBUTE_GROUP_DOC = NO +SUBGROUPING = YES +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES +EXTRACT_STATIC = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_DIRECTORIES = NO +FILE_VERSION_FILTER = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = {{INPUT}} +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.d \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.mm \ + *.dox \ + *.py \ + *.C \ + *.CC \ + *.C++ \ + *.II \ + *.I++ \ + *.H \ + *.HH \ + *.H++ \ + *.CS \ + *.PHP \ + *.PHP3 \ + *.M \ + *.MM \ + *.PY +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = NO +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = YES +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = NO +USE_PDFLATEX = NO +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = NO +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = YES +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +MAX_DOT_GRAPH_DEPTH = 1000 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO diff --git a/maintenance/FiveUpgrade.inc b/maintenance/FiveUpgrade.inc new file mode 100644 index 00000000..7caf6810 --- /dev/null +++ b/maintenance/FiveUpgrade.inc @@ -0,0 +1,1214 @@ +<?php + +require_once( 'cleanupDupes.inc' ); +require_once( 'userDupes.inc' ); +require_once( 'updaters.inc' ); + +define( 'MW_UPGRADE_COPY', false ); +define( 'MW_UPGRADE_ENCODE', true ); +define( 'MW_UPGRADE_NULL', null ); +define( 'MW_UPGRADE_CALLBACK', null ); // for self-documentation only + +class FiveUpgrade { + function FiveUpgrade() { + global $wgDatabase; + $this->conversionTables = $this->prepareWindows1252(); + + $this->dbw =& $this->newConnection(); + $this->dbr =& $this->streamConnection(); + + $this->cleanupSwaps = array(); + $this->emailAuth = false; # don't preauthenticate emails + $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait + } + + function doing( $step ) { + return is_null( $this->step ) || $step == $this->step; + } + + function upgrade( $step ) { + $this->step = $step; + + $tables = array( + 'page', + 'links', + 'user', + 'image', + 'oldimage', + 'watchlist', + 'logging', + 'archive', + 'imagelinks', + 'categorylinks', + 'ipblocks', + 'recentchanges', + 'querycache' ); + foreach( $tables as $table ) { + if( $this->doing( $table ) ) { + $method = 'upgrade' . ucfirst( $table ); + $this->$method(); + } + } + + if( $this->doing( 'cleanup' ) ) { + $this->upgradeCleanup(); + } + } + + + /** + * Open a connection to the master server with the admin rights. + * @return Database + * @access private + */ + function &newConnection() { + global $wgDBadminuser, $wgDBadminpassword; + global $wgDBserver, $wgDBname; + $db =& new Database( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + return $db; + } + + /** + * Open a second connection to the master server, with buffering off. + * This will let us stream large datasets in and write in chunks on the + * other end. + * @return Database + * @access private + */ + function &streamConnection() { + $timeout = 3600 * 24; + $db =& $this->newConnection(); + $db->bufferResults( false ); + $db->query( "SET net_read_timeout=$timeout" ); + $db->query( "SET net_write_timeout=$timeout" ); + return $db; + } + + /** + * Prepare a conversion array for converting Windows Code Page 1252 to + * UTF-8. This should provide proper conversion of text that was miscoded + * as Windows-1252 by naughty user-agents, and doesn't rely on an outside + * iconv library. + * + * @return array + * @access private + */ + function prepareWindows1252() { + # Mappings from: + # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT + static $cp1252 = array( + 0x80 => 0x20AC, #EURO SIGN + 0x81 => UNICODE_REPLACEMENT, + 0x82 => 0x201A, #SINGLE LOW-9 QUOTATION MARK + 0x83 => 0x0192, #LATIN SMALL LETTER F WITH HOOK + 0x84 => 0x201E, #DOUBLE LOW-9 QUOTATION MARK + 0x85 => 0x2026, #HORIZONTAL ELLIPSIS + 0x86 => 0x2020, #DAGGER + 0x87 => 0x2021, #DOUBLE DAGGER + 0x88 => 0x02C6, #MODIFIER LETTER CIRCUMFLEX ACCENT + 0x89 => 0x2030, #PER MILLE SIGN + 0x8A => 0x0160, #LATIN CAPITAL LETTER S WITH CARON + 0x8B => 0x2039, #SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x8C => 0x0152, #LATIN CAPITAL LIGATURE OE + 0x8D => UNICODE_REPLACEMENT, + 0x8E => 0x017D, #LATIN CAPITAL LETTER Z WITH CARON + 0x8F => UNICODE_REPLACEMENT, + 0x90 => UNICODE_REPLACEMENT, + 0x91 => 0x2018, #LEFT SINGLE QUOTATION MARK + 0x92 => 0x2019, #RIGHT SINGLE QUOTATION MARK + 0x93 => 0x201C, #LEFT DOUBLE QUOTATION MARK + 0x94 => 0x201D, #RIGHT DOUBLE QUOTATION MARK + 0x95 => 0x2022, #BULLET + 0x96 => 0x2013, #EN DASH + 0x97 => 0x2014, #EM DASH + 0x98 => 0x02DC, #SMALL TILDE + 0x99 => 0x2122, #TRADE MARK SIGN + 0x9A => 0x0161, #LATIN SMALL LETTER S WITH CARON + 0x9B => 0x203A, #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x9C => 0x0153, #LATIN SMALL LIGATURE OE + 0x9D => UNICODE_REPLACEMENT, + 0x9E => 0x017E, #LATIN SMALL LETTER Z WITH CARON + 0x9F => 0x0178, #LATIN CAPITAL LETTER Y WITH DIAERESIS + ); + $pairs = array(); + for( $i = 0; $i < 0x100; $i++ ) { + $unicode = isset( $cp1252[$i] ) ? $cp1252[$i] : $i; + $pairs[chr( $i )] = codepointToUtf8( $unicode ); + } + return $pairs; + } + + /** + * Convert from 8-bit Windows-1252 to UTF-8 if necessary. + * @param string $text + * @return string + * @access private + */ + function conv( $text ) { + global $wgUseLatin1; + return is_null( $text ) + ? null + : ( $wgUseLatin1 + ? strtr( $text, $this->conversionTables ) + : $text ); + } + + /** + * Dump timestamp and message to output + * @param string $message + * @access private + */ + function log( $message ) { + global $wgDBname; + echo $wgDBname . ' ' . wfTimestamp( TS_DB ) . ': ' . $message . "\n"; + flush(); + } + + /** + * Initialize the chunked-insert system. + * Rows will be inserted in chunks of the given number, rather + * than in a giant INSERT...SELECT query, to keep the serialized + * MySQL database replication from getting hung up. This way other + * things can be going on during conversion without waiting for + * slaves to catch up as badly. + * + * @param int $chunksize Number of rows to insert at once + * @param int $final Total expected number of rows / id of last row, + * used for progress reports. + * @param string $table to insert on + * @param string $fname function name to report in SQL + * @access private + */ + function setChunkScale( $chunksize, $final, $table, $fname ) { + $this->chunkSize = $chunksize; + $this->chunkFinal = $final; + $this->chunkCount = 0; + $this->chunkStartTime = wfTime(); + $this->chunkOptions = array( 'IGNORE' ); + $this->chunkTable = $table; + $this->chunkFunction = $fname; + } + + /** + * Chunked inserts: perform an insert if we've reached the chunk limit. + * Prints a progress report with estimated completion time. + * @param array &$chunk -- This will be emptied if an insert is done. + * @param int $key A key identifier to use in progress estimation in + * place of the number of rows inserted. Use this if + * you provided a max key number instead of a count + * as the final chunk number in setChunkScale() + * @access private + */ + function addChunk( &$chunk, $key = null ) { + if( count( $chunk ) >= $this->chunkSize ) { + $this->insertChunk( $chunk ); + + $this->chunkCount += count( $chunk ); + $now = wfTime(); + $delta = $now - $this->chunkStartTime; + $rate = $this->chunkCount / $delta; + + if( is_null( $key ) ) { + $completed = $this->chunkCount; + } else { + $completed = $key; + } + $portion = $completed / $this->chunkFinal; + + $estimatedTotalTime = $delta / $portion; + $eta = $this->chunkStartTime + $estimatedTotalTime; + + printf( "%s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec\n", + wfTimestamp( TS_DB, intval( $now ) ), + $portion * 100.0, + $this->chunkTable, + wfTimestamp( TS_DB, intval( $eta ) ), + $completed, + $this->chunkFinal, + $rate ); + flush(); + + $chunk = array(); + } + } + + /** + * Chunked inserts: perform an insert unconditionally, at the end, and log. + * @param array &$chunk -- This will be emptied if an insert is done. + * @access private + */ + function lastChunk( &$chunk ) { + $n = count( $chunk ); + if( $n > 0 ) { + $this->insertChunk( $chunk ); + } + $this->log( "100.00% done on $this->chunkTable (last chunk $n rows)." ); + } + + /** + * Chunked inserts: perform an insert. + * @param array &$chunk -- This will be emptied if an insert is done. + * @access private + */ + function insertChunk( &$chunk ) { + // Give slaves a chance to catch up + wfWaitForSlaves( $this->maxLag ); + $this->dbw->insert( $this->chunkTable, $chunk, $this->chunkFunction, $this->chunkOptions ); + } + + + /** + * Copy and transcode a table to table_temp. + * @param string $name Base name of the source table + * @param string $tabledef CREATE TABLE definition, w/ $1 for the name + * @param array $fields set of destination fields to these constants: + * MW_UPGRADE_COPY - straight copy + * MW_UPGRADE_ENCODE - for old Latin1 wikis, conv to UTF-8 + * MW_UPGRADE_NULL - just put NULL + * @param callable $callback An optional callback to modify the data + * or perform other processing. Func should be + * ( object $row, array $copy ) and return $copy + * @access private + */ + function copyTable( $name, $tabledef, $fields, $callback = null ) { + $fname = 'FiveUpgrade::copyTable'; + + $name_temp = $name . '_temp'; + $this->log( "Migrating $name table to $name_temp..." ); + + $table = $this->dbw->tableName( $name ); + $table_temp = $this->dbw->tableName( $name_temp ); + + // Create temporary table; we're going to copy everything in there, + // then at the end rename the final tables into place. + $def = str_replace( '$1', $table_temp, $tabledef ); + $this->dbw->query( $def, $fname ); + + $numRecords = $this->dbw->selectField( $name, 'COUNT(*)', '', $fname ); + $this->setChunkScale( 100, $numRecords, $name_temp, $fname ); + + // Pull all records from the second, streaming database connection. + $sourceFields = array_keys( array_filter( $fields, + create_function( '$x', 'return $x !== MW_UPGRADE_NULL;' ) ) ); + $result = $this->dbr->select( $name, + $sourceFields, + '', + $fname ); + + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $copy = array(); + foreach( $fields as $field => $source ) { + if( $source === MW_UPGRADE_COPY ) { + $copy[$field] = $row->$field; + } elseif( $source === MW_UPGRADE_ENCODE ) { + $copy[$field] = $this->conv( $row->$field ); + } elseif( $source === MW_UPGRADE_NULL ) { + $copy[$field] = null; + } else { + $this->log( "Unknown field copy type: $field => $source" ); + } + } + if( is_callable( $callback ) ) { + $copy = call_user_func( $callback, $row, $copy ); + } + $add[] = $copy; + $this->addChunk( $add ); + } + $this->lastChunk( $add ); + $this->dbr->freeResult( $result ); + + $this->log( "Done converting $name." ); + $this->cleanupSwaps[] = $name; + } + + function upgradePage() { + $fname = "FiveUpgrade::upgradePage"; + $chunksize = 100; + + if( $this->dbw->tableExists( 'page' ) ) { + $this->log( 'Page table already exists; aborting.' ); + die( -1 ); + } + + $this->log( "Checking cur table for unique title index and applying if necessary" ); + checkDupes( true ); + + $this->log( "...converting from cur/old to page/revision/text DB structure." ); + + extract( $this->dbw->tableNames( 'cur', 'old', 'page', 'revision', 'text' ) ); + + $this->log( "Creating page and revision tables..." ); + $this->dbw->query("CREATE TABLE $page ( + page_id int(8) unsigned NOT NULL auto_increment, + page_namespace int NOT NULL, + page_title varchar(255) binary NOT NULL, + page_restrictions tinyblob NOT NULL default '', + page_counter bigint(20) unsigned NOT NULL default '0', + page_is_redirect tinyint(1) unsigned NOT NULL default '0', + page_is_new tinyint(1) unsigned NOT NULL default '0', + page_random real unsigned NOT NULL, + page_touched char(14) binary NOT NULL default '', + page_latest int(8) unsigned NOT NULL, + page_len int(8) unsigned NOT NULL, + + PRIMARY KEY page_id (page_id), + UNIQUE INDEX name_title (page_namespace,page_title), + INDEX (page_random), + INDEX (page_len) + ) TYPE=InnoDB", $fname ); + $this->dbw->query("CREATE TABLE $revision ( + rev_id int(8) unsigned NOT NULL auto_increment, + rev_page int(8) unsigned NOT NULL, + rev_text_id int(8) unsigned NOT NULL, + rev_comment tinyblob NOT NULL default '', + rev_user int(5) unsigned NOT NULL default '0', + rev_user_text varchar(255) binary NOT NULL default '', + rev_timestamp char(14) binary NOT NULL default '', + rev_minor_edit tinyint(1) unsigned NOT NULL default '0', + rev_deleted tinyint(1) unsigned NOT NULL default '0', + + PRIMARY KEY rev_page_id (rev_page, rev_id), + UNIQUE INDEX rev_id (rev_id), + INDEX rev_timestamp (rev_timestamp), + INDEX page_timestamp (rev_page,rev_timestamp), + INDEX user_timestamp (rev_user,rev_timestamp), + INDEX usertext_timestamp (rev_user_text,rev_timestamp) + ) TYPE=InnoDB", $fname ); + + $maxold = intval( $this->dbw->selectField( 'old', 'max(old_id)', '', $fname ) ); + $this->log( "Last old record is {$maxold}" ); + + global $wgLegacySchemaConversion; + if( $wgLegacySchemaConversion ) { + // Create HistoryBlobCurStub entries. + // Text will be pulled from the leftover 'cur' table at runtime. + echo "......Moving metadata from cur; using blob references to text in cur table.\n"; + $cur_text = "concat('O:18:\"historyblobcurstub\":1:{s:6:\"mCurId\";i:',cur_id,';}')"; + $cur_flags = "'object'"; + } else { + // Copy all cur text in immediately: this may take longer but avoids + // having to keep an extra table around. + echo "......Moving text from cur.\n"; + $cur_text = 'cur_text'; + $cur_flags = "''"; + } + + $maxcur = $this->dbw->selectField( 'cur', 'max(cur_id)', '', $fname ); + $this->log( "Last cur entry is $maxcur" ); + + /** + * Copy placeholder records for each page's current version into old + * Don't do any conversion here; text records are converted at runtime + * based on the flags (and may be originally binary!) while the meta + * fields will be converted in the old -> rev and cur -> page steps. + */ + $this->setChunkScale( $chunksize, $maxcur, 'old', $fname ); + $result = $this->dbr->query( + "SELECT cur_id, cur_namespace, cur_title, $cur_text AS text, cur_comment, + cur_user, cur_user_text, cur_timestamp, cur_minor_edit, $cur_flags AS flags + FROM $cur + ORDER BY cur_id", $fname ); + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $add[] = array( + 'old_namespace' => $row->cur_namespace, + 'old_title' => $row->cur_title, + 'old_text' => $row->text, + 'old_comment' => $row->cur_comment, + 'old_user' => $row->cur_user, + 'old_user_text' => $row->cur_user_text, + 'old_timestamp' => $row->cur_timestamp, + 'old_minor_edit' => $row->cur_minor_edit, + 'old_flags' => $row->flags ); + $this->addChunk( $add, $row->cur_id ); + } + $this->lastChunk( $add ); + $this->dbr->freeResult( $result ); + + /** + * Copy revision metadata from old into revision. + * We'll also do UTF-8 conversion of usernames and comments. + */ + #$newmaxold = $this->dbw->selectField( 'old', 'max(old_id)', '', $fname ); + #$this->setChunkScale( $chunksize, $newmaxold, 'revision', $fname ); + #$countold = $this->dbw->selectField( 'old', 'count(old_id)', '', $fname ); + $countold = $this->dbw->selectField( 'old', 'max(old_id)', '', $fname ); + $this->setChunkScale( $chunksize, $countold, 'revision', $fname ); + + $this->log( "......Setting up revision table." ); + $result = $this->dbr->query( + "SELECT old_id, cur_id, old_comment, old_user, old_user_text, + old_timestamp, old_minor_edit + FROM $old,$cur WHERE old_namespace=cur_namespace AND old_title=cur_title", + $fname ); + + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $add[] = array( + 'rev_id' => $row->old_id, + 'rev_page' => $row->cur_id, + 'rev_text_id' => $row->old_id, + 'rev_comment' => $this->conv( $row->old_comment ), + 'rev_user' => $row->old_user, + 'rev_user_text' => $this->conv( $row->old_user_text ), + 'rev_timestamp' => $row->old_timestamp, + 'rev_minor_edit' => $row->old_minor_edit ); + $this->addChunk( $add ); + } + $this->lastChunk( $add ); + $this->dbr->freeResult( $result ); + + + /** + * Copy page metadata from cur into page. + * We'll also do UTF-8 conversion of titles. + */ + $this->log( "......Setting up page table." ); + $this->setChunkScale( $chunksize, $maxcur, 'page', $fname ); + $result = $this->dbr->query( " + SELECT cur_id, cur_namespace, cur_title, cur_restrictions, cur_counter, cur_is_redirect, cur_is_new, + cur_random, cur_touched, rev_id, LENGTH(cur_text) AS len + FROM $cur,$revision + WHERE cur_id=rev_page AND rev_timestamp=cur_timestamp AND rev_id > {$maxold} + ORDER BY cur_id", $fname ); + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $add[] = array( + 'page_id' => $row->cur_id, + 'page_namespace' => $row->cur_namespace, + 'page_title' => $this->conv( $row->cur_title ), + 'page_restrictions' => $row->cur_restrictions, + 'page_counter' => $row->cur_counter, + 'page_is_redirect' => $row->cur_is_redirect, + 'page_is_new' => $row->cur_is_new, + 'page_random' => $row->cur_random, + 'page_touched' => $this->dbw->timestamp(), + 'page_latest' => $row->rev_id, + 'page_len' => $row->len ); + #$this->addChunk( $add, $row->cur_id ); + $this->addChunk( $add ); + } + $this->lastChunk( $add ); + $this->dbr->freeResult( $result ); + + $this->log( "...done with cur/old -> page/revision." ); + } + + function upgradeLinks() { + $fname = 'FiveUpgrade::upgradeLinks'; + $chunksize = 200; + extract( $this->dbw->tableNames( 'links', 'brokenlinks', 'pagelinks', 'cur' ) ); + + $this->log( 'Checking for interwiki table change in case of bogus items...' ); + if( $this->dbw->fieldExists( 'interwiki', 'iw_trans' ) ) { + $this->log( 'interwiki has iw_trans.' ); + } else { + $this->log( 'adding iw_trans...' ); + dbsource( 'maintenance/archives/patch-interwiki-trans.sql', $this->dbw ); + $this->log( 'added iw_trans.' ); + } + + $this->log( 'Creating pagelinks table...' ); + $this->dbw->query( " +CREATE TABLE $pagelinks ( + -- Key to the page_id of the page containing the link. + pl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + pl_namespace int NOT NULL default '0', + pl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY pl_from(pl_from,pl_namespace,pl_title), + KEY (pl_namespace,pl_title) + +) TYPE=InnoDB" ); + + $this->log( 'Importing live links -> pagelinks' ); + $nlinks = $this->dbw->selectField( 'links', 'count(*)', '', $fname ); + if( $nlinks ) { + $this->setChunkScale( $chunksize, $nlinks, 'pagelinks', $fname ); + $result = $this->dbr->query( " + SELECT l_from,cur_namespace,cur_title + FROM $links, $cur + WHERE l_to=cur_id", $fname ); + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $add[] = array( + 'pl_from' => $row->l_from, + 'pl_namespace' => $row->cur_namespace, + 'pl_title' => $this->conv( $row->cur_title ) ); + $this->addChunk( $add ); + } + $this->lastChunk( $add ); + } else { + $this->log( 'no links!' ); + } + + $this->log( 'Importing brokenlinks -> pagelinks' ); + $nbrokenlinks = $this->dbw->selectField( 'brokenlinks', 'count(*)', '', $fname ); + if( $nbrokenlinks ) { + $this->setChunkScale( $chunksize, $nbrokenlinks, 'pagelinks', $fname ); + $result = $this->dbr->query( + "SELECT bl_from, bl_to FROM $brokenlinks", + $fname ); + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $pagename = $this->conv( $row->bl_to ); + $title = Title::newFromText( $pagename ); + if( is_null( $title ) ) { + $this->log( "** invalid brokenlink: $row->bl_from -> '$pagename' (converted from '$row->bl_to')" ); + } else { + $add[] = array( + 'pl_from' => $row->bl_from, + 'pl_namespace' => $title->getNamespace(), + 'pl_title' => $title->getDBkey() ); + $this->addChunk( $add ); + } + } + $this->lastChunk( $add ); + } else { + $this->log( 'no brokenlinks!' ); + } + + $this->log( 'Done with links.' ); + } + + function upgradeUser() { + // Apply unique index, if necessary: + $duper = new UserDupes( $this->dbw ); + if( $duper->hasUniqueIndex() ) { + $this->log( "Already have unique user_name index." ); + } else { + $this->log( "Clearing user duplicates..." ); + if( !$duper->clearDupes() ) { + $this->log( "WARNING: Duplicate user accounts, may explode!" ); + } + } + + $tabledef = <<<END +CREATE TABLE $1 ( + user_id int(5) unsigned NOT NULL auto_increment, + user_name varchar(255) binary NOT NULL default '', + user_real_name varchar(255) binary NOT NULL default '', + user_password tinyblob NOT NULL default '', + user_newpassword tinyblob NOT NULL default '', + user_email tinytext NOT NULL default '', + user_options blob NOT NULL default '', + user_touched char(14) binary NOT NULL default '', + user_token char(32) binary NOT NULL default '', + user_email_authenticated CHAR(14) BINARY, + user_email_token CHAR(32) BINARY, + user_email_token_expires CHAR(14) BINARY, + + PRIMARY KEY user_id (user_id), + UNIQUE INDEX user_name (user_name), + INDEX (user_email_token) + +) TYPE=InnoDB +END; + $fields = array( + 'user_id' => MW_UPGRADE_COPY, + 'user_name' => MW_UPGRADE_ENCODE, + 'user_real_name' => MW_UPGRADE_ENCODE, + 'user_password' => MW_UPGRADE_COPY, + 'user_newpassword' => MW_UPGRADE_COPY, + 'user_email' => MW_UPGRADE_ENCODE, + 'user_options' => MW_UPGRADE_ENCODE, + 'user_touched' => MW_UPGRADE_CALLBACK, + 'user_token' => MW_UPGRADE_COPY, + 'user_email_authenticated' => MW_UPGRADE_CALLBACK, + 'user_email_token' => MW_UPGRADE_NULL, + 'user_email_token_expires' => MW_UPGRADE_NULL ); + $this->copyTable( 'user', $tabledef, $fields, + array( &$this, 'userCallback' ) ); + } + + function userCallback( $row, $copy ) { + $now = $this->dbw->timestamp(); + $copy['user_touched'] = $now; + $copy['user_email_authenticated'] = $this->emailAuth ? $now : null; + return $copy; + } + + function upgradeImage() { + $tabledef = <<<END +CREATE TABLE $1 ( + img_name varchar(255) binary NOT NULL default '', + img_size int(8) unsigned NOT NULL default '0', + img_width int(5) NOT NULL default '0', + img_height int(5) NOT NULL default '0', + img_metadata mediumblob NOT NULL, + img_bits int(3) NOT NULL default '0', + img_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + img_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") NOT NULL default "unknown", + img_minor_mime varchar(32) NOT NULL default "unknown", + img_description tinyblob NOT NULL default '', + img_user int(5) unsigned NOT NULL default '0', + img_user_text varchar(255) binary NOT NULL default '', + img_timestamp char(14) binary NOT NULL default '', + + PRIMARY KEY img_name (img_name), + INDEX img_size (img_size), + INDEX img_timestamp (img_timestamp) +) TYPE=InnoDB +END; + $fields = array( + 'img_name' => MW_UPGRADE_ENCODE, + 'img_size' => MW_UPGRADE_COPY, + 'img_width' => MW_UPGRADE_CALLBACK, + 'img_height' => MW_UPGRADE_CALLBACK, + 'img_metadata' => MW_UPGRADE_CALLBACK, + 'img_bits' => MW_UPGRADE_CALLBACK, + 'img_media_type' => MW_UPGRADE_CALLBACK, + 'img_major_mime' => MW_UPGRADE_CALLBACK, + 'img_minor_mime' => MW_UPGRADE_CALLBACK, + 'img_description' => MW_UPGRADE_ENCODE, + 'img_user' => MW_UPGRADE_COPY, + 'img_user_text' => MW_UPGRADE_ENCODE, + 'img_timestamp' => MW_UPGRADE_COPY ); + $this->copyTable( 'image', $tabledef, $fields, + array( &$this, 'imageCallback' ) ); + } + + function imageCallback( $row, $copy ) { + global $options; + if( !isset( $options['noimage'] ) ) { + // Fill in the new image info fields + $info = $this->imageInfo( $row->img_name ); + + $copy['img_width' ] = $info['width']; + $copy['img_height' ] = $info['height']; + $copy['img_metadata' ] = ""; // loaded on-demand + $copy['img_bits' ] = $info['bits']; + $copy['img_media_type'] = $info['media']; + $copy['img_major_mime'] = $info['major']; + $copy['img_minor_mime'] = $info['minor']; + } + + // If doing UTF8 conversion the file must be renamed + $this->renameFile( $row->img_name, 'wfImageDir' ); + + return $copy; + } + + function imageInfo( $name, $subdirCallback='wfImageDir', $basename = null ) { + if( is_null( $basename ) ) $basename = $name; + $dir = call_user_func( $subdirCallback, $basename ); + $filename = $dir . '/' . $name; + $info = array( + 'width' => 0, + 'height' => 0, + 'bits' => 0, + 'media' => '', + 'major' => '', + 'minor' => '' ); + + $magic =& wfGetMimeMagic(); + $mime = $magic->guessMimeType( $filename, true ); + list( $info['major'], $info['minor'] ) = explode( '/', $mime ); + + $info['media'] = $magic->getMediaType( $filename, $mime ); + + # Height and width + $gis = false; + if( $mime == 'image/svg' ) { + $gis = wfGetSVGsize( $filename ); + } elseif( $magic->isPHPImageType( $mime ) ) { + $gis = getimagesize( $filename ); + } else { + $this->log( "Surprising mime type: $mime" ); + } + if( $gis ) { + $info['width' ] = $gis[0]; + $info['height'] = $gis[1]; + } + if( isset( $gis['bits'] ) ) { + $info['bits'] = $gis['bits']; + } + + return $info; + } + + + /** + * Truncate a table. + * @param string $table The table name to be truncated + */ + function clearTable( $table ) { + print "Clearing $table...\n"; + $tableName = $this->db->tableName( $table ); + $this->db->query( 'TRUNCATE $tableName' ); + } + + /** + * Rename a given image or archived image file to the converted filename, + * leaving a symlink for URL compatibility. + * + * @param string $oldname pre-conversion filename + * @param string $basename pre-conversion base filename for dir hashing, if an archive + * @access private + */ + function renameFile( $oldname, $subdirCallback='wfImageDir', $basename=null ) { + $newname = $this->conv( $oldname ); + if( $newname == $oldname ) { + // No need to rename; another field triggered this row. + return false; + } + + if( is_null( $basename ) ) $basename = $oldname; + $ubasename = $this->conv( $basename ); + $oldpath = call_user_func( $subdirCallback, $basename ) . '/' . $oldname; + $newpath = call_user_func( $subdirCallback, $ubasename ) . '/' . $newname; + + $this->log( "$oldpath -> $newpath" ); + if( rename( $oldpath, $newpath ) ) { + $relpath = $this->relativize( $newpath, dirname( $oldpath ) ); + if( !symlink( $relpath, $oldpath ) ) { + $this->log( "... symlink failed!" ); + } + return $newname; + } else { + $this->log( "... rename failed!" ); + return false; + } + } + + /** + * Generate a relative path name to the given file. + * Assumes Unix-style paths, separators, and semantics. + * + * @param string $path Absolute destination path including target filename + * @param string $from Absolute source path, directory only + * @return string + * @access private + * @static + */ + function relativize( $path, $from ) { + $pieces = explode( '/', dirname( $path ) ); + $against = explode( '/', $from ); + + // Trim off common prefix + while( count( $pieces ) && count( $against ) + && $pieces[0] == $against[0] ) { + array_shift( $pieces ); + array_shift( $against ); + } + + // relative dots to bump us to the parent + while( count( $against ) ) { + array_unshift( $pieces, '..' ); + array_shift( $against ); + } + + array_push( $pieces, basename( $path ) ); + + return implode( '/', $pieces ); + } + + function upgradeOldImage() { + $tabledef = <<<END +CREATE TABLE $1 ( + -- Base filename: key to image.img_name + oi_name varchar(255) binary NOT NULL default '', + + -- Filename of the archived file. + -- This is generally a timestamp and '!' prepended to the base name. + oi_archive_name varchar(255) binary NOT NULL default '', + + -- Other fields as in image... + oi_size int(8) unsigned NOT NULL default 0, + oi_width int(5) NOT NULL default 0, + oi_height int(5) NOT NULL default 0, + oi_bits int(3) NOT NULL default 0, + oi_description tinyblob NOT NULL default '', + oi_user int(5) unsigned NOT NULL default '0', + oi_user_text varchar(255) binary NOT NULL default '', + oi_timestamp char(14) binary NOT NULL default '', + + INDEX oi_name (oi_name(10)) + +) TYPE=InnoDB; +END; + $fields = array( + 'oi_name' => MW_UPGRADE_ENCODE, + 'oi_archive_name' => MW_UPGRADE_ENCODE, + 'oi_size' => MW_UPGRADE_COPY, + 'oi_width' => MW_UPGRADE_CALLBACK, + 'oi_height' => MW_UPGRADE_CALLBACK, + 'oi_bits' => MW_UPGRADE_CALLBACK, + 'oi_description' => MW_UPGRADE_ENCODE, + 'oi_user' => MW_UPGRADE_COPY, + 'oi_user_text' => MW_UPGRADE_ENCODE, + 'oi_timestamp' => MW_UPGRADE_COPY ); + $this->copyTable( 'oldimage', $tabledef, $fields, + array( &$this, 'oldimageCallback' ) ); + } + + function oldimageCallback( $row, $copy ) { + global $options; + if( !isset( $options['noimage'] ) ) { + // Fill in the new image info fields + $info = $this->imageInfo( $row->oi_archive_name, 'wfImageArchiveDir', $row->oi_name ); + $copy['oi_width' ] = $info['width' ]; + $copy['oi_height'] = $info['height']; + $copy['oi_bits' ] = $info['bits' ]; + } + + // If doing UTF8 conversion the file must be renamed + $this->renameFile( $row->oi_archive_name, 'wfImageArchiveDir', $row->oi_name ); + + return $copy; + } + + + function upgradeWatchlist() { + $fname = 'FiveUpgrade::upgradeWatchlist'; + $chunksize = 100; + + extract( $this->dbw->tableNames( 'watchlist', 'watchlist_temp' ) ); + + $this->log( 'Migrating watchlist table to watchlist_temp...' ); + $this->dbw->query( +"CREATE TABLE $watchlist_temp ( + -- Key to user_id + wl_user int(5) unsigned NOT NULL, + + -- Key to page_namespace/page_title + -- Note that users may watch patches which do not exist yet, + -- or existed in the past but have been deleted. + wl_namespace int NOT NULL default '0', + wl_title varchar(255) binary NOT NULL default '', + + -- Timestamp when user was last sent a notification e-mail; + -- cleared when the user visits the page. + -- FIXME: add proper null support etc + wl_notificationtimestamp varchar(14) binary NOT NULL default '0', + + UNIQUE KEY (wl_user, wl_namespace, wl_title), + KEY namespace_title (wl_namespace,wl_title) + +) TYPE=InnoDB;", $fname ); + + // Fix encoding for Latin-1 upgrades, add some fields, + // and double article to article+talk pairs + $numwatched = $this->dbw->selectField( 'watchlist', 'count(*)', '', $fname ); + + $this->setChunkScale( $chunksize, $numwatched * 2, 'watchlist_temp', $fname ); + $result = $this->dbr->select( 'watchlist', + array( + 'wl_user', + 'wl_namespace', + 'wl_title' ), + '', + $fname ); + + $add = array(); + while( $row = $this->dbr->fetchObject( $result ) ) { + $now = $this->dbw->timestamp(); + $add[] = array( + 'wl_user' => $row->wl_user, + 'wl_namespace' => Namespace::getSubject( $row->wl_namespace ), + 'wl_title' => $this->conv( $row->wl_title ), + 'wl_notificationtimestamp' => '0' ); + $this->addChunk( $add ); + + $add[] = array( + 'wl_user' => $row->wl_user, + 'wl_namespace' => Namespace::getTalk( $row->wl_namespace ), + 'wl_title' => $this->conv( $row->wl_title ), + 'wl_notificationtimestamp' => '0' ); + $this->addChunk( $add ); + } + $this->lastChunk( $add ); + $this->dbr->freeResult( $result ); + + $this->log( 'Done converting watchlist.' ); + $this->cleanupSwaps[] = 'watchlist'; + } + + function upgradeLogging() { + $tabledef = <<<END +CREATE TABLE $1 ( + -- Symbolic keys for the general log type and the action type + -- within the log. The output format will be controlled by the + -- action field, but only the type controls categorization. + log_type char(10) NOT NULL default '', + log_action char(10) NOT NULL default '', + + -- Timestamp. Duh. + log_timestamp char(14) NOT NULL default '19700101000000', + + -- The user who performed this action; key to user_id + log_user int unsigned NOT NULL default 0, + + -- Key to the page affected. Where a user is the target, + -- this will point to the user page. + log_namespace int NOT NULL default 0, + log_title varchar(255) binary NOT NULL default '', + + -- Freeform text. Interpreted as edit history comments. + log_comment varchar(255) NOT NULL default '', + + -- LF separated list of miscellaneous parameters + log_params blob NOT NULL default '', + + KEY type_time (log_type, log_timestamp), + KEY user_time (log_user, log_timestamp), + KEY page_time (log_namespace, log_title, log_timestamp) + +) TYPE=InnoDB +END; + $fields = array( + 'log_type' => MW_UPGRADE_COPY, + 'log_action' => MW_UPGRADE_COPY, + 'log_timestamp' => MW_UPGRADE_COPY, + 'log_user' => MW_UPGRADE_COPY, + 'log_namespace' => MW_UPGRADE_COPY, + 'log_title' => MW_UPGRADE_ENCODE, + 'log_comment' => MW_UPGRADE_ENCODE, + 'log_params' => MW_UPGRADE_ENCODE ); + $this->copyTable( 'logging', $tabledef, $fields ); + } + + function upgradeArchive() { + $tabledef = <<<END +CREATE TABLE $1 ( + ar_namespace int NOT NULL default '0', + ar_title varchar(255) binary NOT NULL default '', + ar_text mediumblob NOT NULL default '', + + ar_comment tinyblob NOT NULL default '', + ar_user int(5) unsigned NOT NULL default '0', + ar_user_text varchar(255) binary NOT NULL, + ar_timestamp char(14) binary NOT NULL default '', + ar_minor_edit tinyint(1) NOT NULL default '0', + + ar_flags tinyblob NOT NULL default '', + + ar_rev_id int(8) unsigned, + ar_text_id int(8) unsigned, + + KEY name_title_timestamp (ar_namespace,ar_title,ar_timestamp) + +) TYPE=InnoDB +END; + $fields = array( + 'ar_namespace' => MW_UPGRADE_COPY, + 'ar_title' => MW_UPGRADE_ENCODE, + 'ar_text' => MW_UPGRADE_COPY, + 'ar_comment' => MW_UPGRADE_ENCODE, + 'ar_user' => MW_UPGRADE_COPY, + 'ar_user_text' => MW_UPGRADE_ENCODE, + 'ar_timestamp' => MW_UPGRADE_COPY, + 'ar_minor_edit' => MW_UPGRADE_COPY, + 'ar_flags' => MW_UPGRADE_COPY, + 'ar_rev_id' => MW_UPGRADE_NULL, + 'ar_text_id' => MW_UPGRADE_NULL ); + $this->copyTable( 'archive', $tabledef, $fields ); + } + + function upgradeImagelinks() { + global $wgUseLatin1; + if( $wgUseLatin1 ) { + $tabledef = <<<END +CREATE TABLE $1 ( + -- Key to page_id of the page containing the image / media link. + il_from int(8) unsigned NOT NULL default '0', + + -- Filename of target image. + -- This is also the page_title of the file's description page; + -- all such pages are in namespace 6 (NS_IMAGE). + il_to varchar(255) binary NOT NULL default '', + + UNIQUE KEY il_from(il_from,il_to), + KEY (il_to) + +) TYPE=InnoDB +END; + $fields = array( + 'il_from' => MW_UPGRADE_COPY, + 'il_to' => MW_UPGRADE_ENCODE ); + $this->copyTable( 'imagelinks', $tabledef, $fields ); + } + } + + function upgradeCategorylinks() { + global $wgUseLatin1; + if( $wgUseLatin1 ) { + $tabledef = <<<END +CREATE TABLE $1 ( + cl_from int(8) unsigned NOT NULL default '0', + cl_to varchar(255) binary NOT NULL default '', + cl_sortkey varchar(86) binary NOT NULL default '', + cl_timestamp timestamp NOT NULL, + + UNIQUE KEY cl_from(cl_from,cl_to), + KEY cl_sortkey(cl_to,cl_sortkey), + KEY cl_timestamp(cl_to,cl_timestamp) +) TYPE=InnoDB +END; + $fields = array( + 'cl_from' => MW_UPGRADE_COPY, + 'cl_to' => MW_UPGRADE_ENCODE, + 'cl_sortkey' => MW_UPGRADE_ENCODE, + 'cl_timestamp' => MW_UPGRADE_COPY ); + $this->copyTable( 'categorylinks', $tabledef, $fields ); + } + } + + function upgradeIpblocks() { + global $wgUseLatin1; + if( $wgUseLatin1 ) { + $tabledef = <<<END +CREATE TABLE $1 ( + ipb_id int(8) NOT NULL auto_increment, + ipb_address varchar(40) binary NOT NULL default '', + ipb_user int(8) unsigned NOT NULL default '0', + ipb_by int(8) unsigned NOT NULL default '0', + ipb_reason tinyblob NOT NULL default '', + ipb_timestamp char(14) binary NOT NULL default '', + ipb_auto tinyint(1) NOT NULL default '0', + ipb_expiry char(14) binary NOT NULL default '', + + PRIMARY KEY ipb_id (ipb_id), + INDEX ipb_address (ipb_address), + INDEX ipb_user (ipb_user) + +) TYPE=InnoDB +END; + $fields = array( + 'ipb_id' => MW_UPGRADE_COPY, + 'ipb_address' => MW_UPGRADE_COPY, + 'ipb_user' => MW_UPGRADE_COPY, + 'ipb_by' => MW_UPGRADE_COPY, + 'ipb_reason' => MW_UPGRADE_ENCODE, + 'ipb_timestamp' => MW_UPGRADE_COPY, + 'ipb_auto' => MW_UPGRADE_COPY, + 'ipb_expiry' => MW_UPGRADE_COPY ); + $this->copyTable( 'ipblocks', $tabledef, $fields ); + } + } + + function upgradeRecentchanges() { + // There's a format change in the namespace field + $tabledef = <<<END +CREATE TABLE $1 ( + rc_id int(8) NOT NULL auto_increment, + rc_timestamp varchar(14) binary NOT NULL default '', + rc_cur_time varchar(14) binary NOT NULL default '', + + rc_user int(10) unsigned NOT NULL default '0', + rc_user_text varchar(255) binary NOT NULL default '', + + rc_namespace int NOT NULL default '0', + rc_title varchar(255) binary NOT NULL default '', + + rc_comment varchar(255) binary NOT NULL default '', + rc_minor tinyint(3) unsigned NOT NULL default '0', + + rc_bot tinyint(3) unsigned NOT NULL default '0', + rc_new tinyint(3) unsigned NOT NULL default '0', + + rc_cur_id int(10) unsigned NOT NULL default '0', + rc_this_oldid int(10) unsigned NOT NULL default '0', + rc_last_oldid int(10) unsigned NOT NULL default '0', + + rc_type tinyint(3) unsigned NOT NULL default '0', + rc_moved_to_ns tinyint(3) unsigned NOT NULL default '0', + rc_moved_to_title varchar(255) binary NOT NULL default '', + + rc_patrolled tinyint(3) unsigned NOT NULL default '0', + + rc_ip char(15) NOT NULL default '', + + PRIMARY KEY rc_id (rc_id), + INDEX rc_timestamp (rc_timestamp), + INDEX rc_namespace_title (rc_namespace, rc_title), + INDEX rc_cur_id (rc_cur_id), + INDEX new_name_timestamp(rc_new,rc_namespace,rc_timestamp), + INDEX rc_ip (rc_ip) + +) TYPE=InnoDB +END; + $fields = array( + 'rc_id' => MW_UPGRADE_COPY, + 'rc_timestamp' => MW_UPGRADE_COPY, + 'rc_cur_time' => MW_UPGRADE_COPY, + 'rc_user' => MW_UPGRADE_COPY, + 'rc_user_text' => MW_UPGRADE_ENCODE, + 'rc_namespace' => MW_UPGRADE_COPY, + 'rc_title' => MW_UPGRADE_ENCODE, + 'rc_comment' => MW_UPGRADE_ENCODE, + 'rc_minor' => MW_UPGRADE_COPY, + 'rc_bot' => MW_UPGRADE_COPY, + 'rc_new' => MW_UPGRADE_COPY, + 'rc_cur_id' => MW_UPGRADE_COPY, + 'rc_this_oldid' => MW_UPGRADE_COPY, + 'rc_last_oldid' => MW_UPGRADE_COPY, + 'rc_type' => MW_UPGRADE_COPY, + 'rc_moved_to_ns' => MW_UPGRADE_COPY, + 'rc_moved_to_title' => MW_UPGRADE_ENCODE, + 'rc_patrolled' => MW_UPGRADE_COPY, + 'rc_ip' => MW_UPGRADE_COPY ); + $this->copyTable( 'recentchanges', $tabledef, $fields ); + } + + function upgradeQuerycache() { + // There's a format change in the namespace field + $tabledef = <<<END +CREATE TABLE $1 ( + -- A key name, generally the base name of of the special page. + qc_type char(32) NOT NULL, + + -- Some sort of stored value. Sizes, counts... + qc_value int(5) unsigned NOT NULL default '0', + + -- Target namespace+title + qc_namespace int NOT NULL default '0', + qc_title char(255) binary NOT NULL default '', + + KEY (qc_type,qc_value) + +) TYPE=InnoDB +END; + $fields = array( + 'qc_type' => MW_UPGRADE_COPY, + 'qc_value' => MW_UPGRADE_COPY, + 'qc_namespace' => MW_UPGRADE_COPY, + 'qc_title' => MW_UPGRADE_ENCODE ); + $this->copyTable( 'querycache', $tabledef, $fields ); + } + + /** + * Rename all our temporary tables into final place. + * We've left things in place so a read-only wiki can continue running + * on the old code during all this. + */ + function upgradeCleanup() { + $this->renameTable( 'old', 'text' ); + + foreach( $this->cleanupSwaps as $table ) { + $this->swap( $table ); + } + } + + function renameTable( $from, $to ) { + $this->log( "Renaming $from to $to..." ); + + $fromtable = $this->dbw->tableName( $from ); + $totable = $this->dbw->tableName( $to ); + $this->dbw->query( "ALTER TABLE $fromtable RENAME TO $totable" ); + } + + function swap( $base ) { + $this->renameTable( $base, "{$base}_old" ); + $this->renameTable( "{$base}_temp", $base ); + } + +} + +?> diff --git a/maintenance/InitialiseMessages.inc b/maintenance/InitialiseMessages.inc new file mode 100644 index 00000000..189fbd25 --- /dev/null +++ b/maintenance/InitialiseMessages.inc @@ -0,0 +1,240 @@ +<?php +/** + * Script to initialise the MediaWiki namespace + * + * This script is included from update.php and install.php. Do not run it + * by itself. + * + * @deprecated + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +function initialiseMessages( $overwrite = false, $messageArray = false ) { + global $wgContLang, $wgContLanguageCode; + global $wgContLangClass, $wgAllMessagesEn; + global $wgDisableLangConversion; + global $wgForceUIMsgAsContentMsg; + global $wgLanguageNames; + global $IP; + + # overwrite language conversion option so that all variants + # of the messages are initialised + $wgDisableLangConversion = false; + + if ( $messageArray ) { + $sortedArray = $messageArray; + } else { + $sortedArray = $wgAllMessagesEn; + } + + ksort( $sortedArray ); + $messages=array(); + + $variants = $wgContLang->getVariants(); + if(!in_array($wgContLanguageCode, $variants)) + $variants[]=$wgContLanguageCode; + + foreach ($variants as $v) { + $langclass = 'Language'. str_replace( '-', '_', ucfirst( $v ) ); + if( !class_exists($langclass) ) { + wfDie( "class $langclass not defined. perhaps you need to include the file $langclass.php in $wgContLangClass.php?" ); + } + $lang = new $langclass; + + if($v==$wgContLanguageCode) + $suffix=''; + else + $suffix="/$v"; + foreach ($sortedArray as $key => $msg) { + $messages[$key.$suffix] = $lang->getMessage($key); + } + } + + require_once('languages/Names.php'); + + /* + initialize all messages in $wgForceUIMsgAsContentMsg for all + languages in Names.php + */ + if( is_array( $wgForceUIMsgAsContentMsg ) ) { + foreach( $wgForceUIMsgAsContentMsg as $uikey ) { + foreach( $wgLanguageNames as $code => $name) { + if( $code == $wgContLanguageCode ) + continue; + $msg = $wgContLang->getMessage( $uikey ); + if( $msg ) + $messages[$uikey. '/' . $code] = $msg; + } + } + } + initialiseMessagesReal( $overwrite, $messages ); +} + +/** */ +function initialiseMessagesReal( $overwrite = false, $messageArray = false ) { + global $wgContLang, $wgScript, $wgServer, $wgAllMessagesEn; + global $wgOut, $wgArticle, $wgUser; + global $wgMessageCache, $wgMemc, $wgDBname, $wgUseMemCached; + + # Initialise $wgOut and $wgUser for a command line script + $wgOut->disable(); + + $wgUser = new User; + $wgUser->setLoaded( true ); # Don't load from DB + $wgUser->setName( 'MediaWiki default' ); + + # Don't try to draw messages from the database we're initialising + $wgMessageCache->disable(); + $wgMessageCache->disableTransform(); + + $fname = 'initialiseMessages'; + $ns = NS_MEDIAWIKI; + # cur_user_text responsible for the modifications + # Don't change it unless you're prepared to update the DBs accordingly, otherwise the + # default messages won't be overwritte + $username = 'MediaWiki default'; + + + print "Initialising \"MediaWiki\" namespace...\n"; + + + $dbr =& wfGetDB( DB_SLAVE ); + $dbw =& wfGetDB( DB_MASTER ); + $page = $dbr->tableName( 'page' ); + $revision = $dbr->tableName( 'revision' ); + + $timestamp = wfTimestampNow(); + + #$sql = "SELECT cur_title,cur_is_new,cur_user_text FROM $cur WHERE cur_namespace=$ns AND cur_title IN("; + # Get keys from $wgAllMessagesEn, which is more complete than the local language + $first = true; + if ( $messageArray ) { + $sortedArray = $messageArray; + } else { + $sortedArray = $wgAllMessagesEn; + } + + ksort( $sortedArray ); + + # SELECT all existing messages + # Can't afford to be locking all rows for update, this script can take quite a long time to complete + $rows = array(); + $nitems = count($sortedArray); + $maxitems = $dbr->maxListLen(); + $pos = 0; + if ($maxitems) + $chunks = array_chunk($sortedArray, $maxitems); + else + $chunks = array($sortedArray); + + foreach ($chunks as $chunk) { + $first = true; + $sql = "SELECT page_title,page_is_new,rev_user_text FROM $page, $revision WHERE + page_namespace=$ns AND rev_page=page_id AND page_title IN("; + + foreach ( $chunk as $key => $enMsg ) { + if ( $key == '' ) { + continue; // Skip odd members + } + if ( $first ) { + $first = false; + } else { + $sql .= ','; + } + $titleObj = Title::newFromText( $wgContLang->ucfirst( $key ) ); + $enctitle = $dbr->strencode($titleObj->getDBkey()); + $sql .= "'$enctitle'"; + } + + $sql .= ')'; + $res = $dbr->query( $sql ); + while ($row = $dbr->fetchObject($res)) + $rows[] = $row; + } + + # Read the results into an array + # Decide whether or not each one needs to be overwritten + $existingTitles = array(); + foreach ($rows as $row) { + if ( $row->rev_user_text != $username && $row->rev_user_text != 'Template namespace initialisation script' ) { + $existingTitles[$row->page_title] = 'keep'; + } else { + $existingTitles[$row->page_title] = 'chuck'; + } + } + + # Insert queries are done in one multi-row insert + # Here's the start of it: + $arr = array(); + $talk = $wgContLang->getNsText( NS_TALK ); + $mwtalk = $wgContLang->getNsText( NS_MEDIAWIKI_TALK ); + + # Merge these into a single transaction for speed + $dbw->begin(); + + # Process each message + foreach ( $sortedArray as $key => $enMsg ) { + if ( $key == '' ) { + continue; // Skip odd members + } + # Get message text + if ( $messageArray ) { + $message = $enMsg; + } else { + $message = wfMsgNoDBForContent( $key ); + } + $titleObj = Title::newFromText( $wgContLang->ucfirst( $key ), NS_MEDIAWIKI ); + $title = $titleObj->getDBkey(); + + # Update messages which already exist + if ( array_key_exists( $title, $existingTitles ) ) { + if ( $existingTitles[$title] == 'chuck' || $overwrite) { + # Don't bother writing a new revision if we're the same + # as the current text! + $revision = Revision::newFromTitle( $titleObj ); + if( is_null( $revision ) || $revision->getText() != $message ) { + $article = new Article( $titleObj ); + $article->quickEdit( $message ); + } + } + } else { + $article = new Article( $titleObj ); + $newid = $article->insertOn( $dbw ); + # FIXME: set restrictions + $revision = new Revision( array( + 'page' => $newid, + 'text' => $message, + 'user' => 0, + 'user_text' => $username, + 'comment' => '', + ) ); + $revid = $revision->insertOn( $dbw ); + $article->updateRevisionOn( $dbw, $revision ); + } + } + $dbw->commit(); + + # Clear the relevant memcached key + print 'Clearing message cache...'; + $wgMessageCache->clear(); + print "Done.\n"; +} + +/** */ +function loadLanguageFile( $filename ) { + $contents = file_get_contents( $filename ); + # Remove header line + $p = strpos( $contents, "\n" ) + 1; + $contents = substr( $contents, $p ); + # Unserialize + return unserialize( $contents ); +} + +/** */ +function doUpdates() { + global $wgDeferredUpdateList; + foreach ( $wgDeferredUpdateList as $up ) { $up->doUpdate(); } +} +?> diff --git a/maintenance/Makefile b/maintenance/Makefile new file mode 100644 index 00000000..97f8b60b --- /dev/null +++ b/maintenance/Makefile @@ -0,0 +1,20 @@ +.PHONY: help test test-light +help: + # Run 'make test' to run the parser tests. + # Run 'make doc' to run the phpdoc generation. + # Run 'make doxydoc' (unsupported doxygen generation). + +test: + php parserTests.php + +test-light: + php parserTests.php --color=light + +doc: + php mwdocgen.php -all + echo 'Doc generation done. Look at ./docs/html/' + +doxydoc: + cd .. && doxygen maintenance/mwdoxygen.cfg + echo 'Doc generation done. Look at ./docs/html/' + diff --git a/maintenance/README b/maintenance/README new file mode 100644 index 00000000..9eb69ba8 --- /dev/null +++ b/maintenance/README @@ -0,0 +1,85 @@ +== MediaWiki Maintenance == + +The .sql scripts in this directory are not intended to be run standalone, +although this is appropriate in some cases, e.g. manual creation of blank tables +prior to an import. + +Most of the PHP scripts need to be run from the command line. Prior to doing so, +ensure that the LocalSettings.php file in the directory above points to the +proper installation. + +Certain scripts will require elevated access to the database. In order to +provide this, first create a MySQL user with "all" permissions on the wiki +database, and then place their username and password in an AdminSettings.php +file in the directory above. See AdminSettings.sample for specifics on this. + +=== Brief explanation of files === + +A lot of the files in this directory are PHP scripts used to perform various +maintenance tasks on the wiki database, e.g. rebuilding link tables, updating +the search indices, etc. The files in the "archives" directory are used to +upgrade the database schema when updating the software. Some schema definitions +for alternative (as yet unsupported) database management systems are stored +here too. + +The "storage" directory contains scripts and resources useful for working with +external storage clusters, and are not likely to be particularly useful to the +vast majority of installations. This directory does contain the compressOld +scripts, however, which can be useful for compacting old data. + +=== Maintenance scripts === + +As noted above, these should be run from the command line. Not all scripts are +listed, as some are Wikimedia-specific, and some are not applicable to most +installations. + + changePassword.php + Reset the password of a specified user + + cleanupSpam.php + Mass-revert insertion of linkspam + + deleteOldRevisions.php + Erase old revisions of pages from the database + + dumpBackup.php + Backup dump script + + dumpHTML.php + Produce an HTML dump of a wiki + + importDump.php + XML dump importer + + importImages.php + Imports images into the wiki + + importTextFile.php + Imports the contents of a text file into a wiki page + + nukePage.php + Wipe a page and all revisions from the database + + reassignEdits.php + Reassign edits from one user to another + + rebuildImages.php + Update image metadata records + + rebuildMessages.php + Update the MediaWiki namespace after changing site language + + rebuildtextindex.php + Rebuild the fulltext search indices + + refreshLinks.php + Rebuild the link tables + + removeUnusedAccounts.php + Remove user accounts which have made no edits + + runJobs.php + Immediately complete all jobs in the job queue + + update.php + Check and upgrade the database schema to the current version
\ No newline at end of file diff --git a/maintenance/addwiki.php b/maintenance/addwiki.php new file mode 100644 index 00000000..253033a3 --- /dev/null +++ b/maintenance/addwiki.php @@ -0,0 +1,210 @@ +<?php + +$wgNoDBParam = true; + +require_once( "commandLine.inc" ); +require_once( "rebuildInterwiki.inc" ); +require_once( "languages/Names.php" ); +if ( count( $args ) != 3 ) { + wfDie( "Usage: php addwiki.php <language> <site> <dbname>\n" ); +} + +addWiki( $args[0], $args[1], $args[2] ); + +# ----------------------------------------------------------------- + +function addWiki( $lang, $site, $dbName ) +{ + global $IP, $wgLanguageNames, $wgDefaultExternalStore; + + $name = $wgLanguageNames[$lang]; + + $dbw =& wfGetDB( DB_WRITE ); + $common = "/home/wikipedia/common"; + $maintenance = "$IP/maintenance"; + + print "Creating database $dbName for $lang.$site\n"; + + # Set up the database + $dbw->query( "SET table_type=Innodb" ); + $dbw->query( "CREATE DATABASE $dbName" ); + $dbw->selectDB( $dbName ); + + print "Initialising tables\n"; + dbsource( "$maintenance/tables.sql", $dbw ); + dbsource( "$IP/extensions/OAI/update_table.sql", $dbw ); + $dbw->query( "INSERT INTO site_stats(ss_row_id) VALUES (1)" ); + + # Initialise external storage + if ( $wgDefaultExternalStore && preg_match( '!^DB://(.*)$!', $wgDefaultExternalStore, $m ) ) { + print "Initialising external storage...\n"; + require_once( 'ExternalStoreDB.php' ); + global $wgDBuser, $wgDBpassword, $wgExternalServers; + $cluster = $m[1]; + + # Hack + $wgExternalServers[$cluster][0]['user'] = $wgDBuser; + $wgExternalServers[$cluster][0]['password'] = $wgDBpassword; + + $store = new ExternalStoreDB; + $extdb =& $store->getMaster( $cluster ); + $extdb->query( "SET table_type=InnoDB" ); + $extdb->query( "CREATE DATABASE $dbName" ); + $extdb->selectDB( $dbName ); + dbsource( "$maintenance/storage/blobs.sql", $extdb ); + $extdb->immediateCommit(); + } + + $wgTitle = Title::newMainPage(); + $wgArticle = new Article( $wgTitle ); + $ucsite = ucfirst( $site ); + + $wgArticle->insertNewArticle( " +==This subdomain is reserved for the creation of a $ucsite in '''[[:en:{$name}|{$name}]]''' language== + +If you can write in this language and want to collaborate in the creation of this encyclopedia then '''you''' can make it. + +Go ahead. Translate this page and start working on your encyclopedia. + +For help, see '''[[m:Help:How to start a new Wikipedia|how to start a new Wikipedia]]'''. + +==Sister projects== +[http://meta.wikipedia.org Meta-Wikipedia] | [http://www.wiktionary.org Wikitonary] | [http://www.wikibooks.org Wikibooks] | [http://www.wikinews.org Wikinews] | [http://www.wikiquote.org Wikiquote] | [http://www.wikisource.org Wikisource] + +See the [http://www.wikipedia.org Wikipedia portal] for other language Wikipedias. + +[[aa:]] +[[af:]] +[[als:]] +[[ar:]] +[[de:]] +[[en:]] +[[as:]] +[[ast:]] +[[ay:]] +[[az:]] +[[be:]] +[[bg:]] +[[bn:]] +[[bo:]] +[[bs:]] +[[cs:]] +[[co:]] +[[cs:]] +[[cy:]] +[[da:]] +[[el:]] +[[eo:]] +[[es:]] +[[et:]] +[[eu:]] +[[fa:]] +[[fi:]] +[[fr:]] +[[fy:]] +[[ga:]] +[[gl:]] +[[gn:]] +[[gu:]] +[[he:]] +[[hi:]] +[[hr:]] +[[hy:]] +[[ia:]] +[[id:]] +[[is:]] +[[it:]] +[[ja:]] +[[ka:]] +[[kk:]] +[[km:]] +[[kn:]] +[[ko:]] +[[ks:]] +[[ku:]] +[[ky:]] +[[la:]] +[[ln:]] +[[lo:]] +[[lt:]] +[[lv:]] +[[hu:]] +[[mi:]] +[[mk:]] +[[ml:]] +[[mn:]] +[[mr:]] +[[ms:]] +[[mt:]] +[[my:]] +[[na:]] +[[nah:]] +[[nds:]] +[[ne:]] +[[nl:]] +[[no:]] +[[oc:]] +[[om:]] +[[pa:]] +[[pl:]] +[[ps:]] +[[pt:]] +[[qu:]] +[[ro:]] +[[ru:]] +[[sa:]] +[[si:]] +[[sk:]] +[[sl:]] +[[sq:]] +[[sr:]] +[[sv:]] +[[sw:]] +[[ta:]] +[[te:]] +[[tg:]] +[[th:]] +[[tk:]] +[[tl:]] +[[tr:]] +[[tt:]] +[[ug:]] +[[uk:]] +[[ur:]] +[[uz:]] +[[vi:]] +[[vo:]] +[[xh:]] +[[yo:]] +[[za:]] +[[zh:]] +[[zu:]] +", '', false, false ); + + print "Adding to dblists\n"; + + # Add to dblist + $file = fopen( "$common/all.dblist", "a" ); + fwrite( $file, "$dbName\n" ); + fclose( $file ); + + # Update the sublists + system("cd $common && ./refresh-dblist"); + + print "Constructing interwiki SQL\n"; + # Rebuild interwiki tables + $sql = getRebuildInterwikiSQL(); + $tempname = tempnam( '/tmp', 'addwiki' ); + $file = fopen( $tempname, 'w' ); + if ( !$file ) { + wfDie( "Error, unable to open temporary file $tempname\n" ); + } + fwrite( $file, $sql ); + fclose( $file ); + print "Sourcing interwiki SQL\n"; + dbsource( $tempname, $dbw ); + unlink( $tempname ); + + print "Script ended. You now want to run sync-common-all to publish *dblist files (check them for duplicates first)\n"; +} +?> diff --git a/maintenance/alltrans.php b/maintenance/alltrans.php new file mode 100644 index 00000000..2fdc4499 --- /dev/null +++ b/maintenance/alltrans.php @@ -0,0 +1,11 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ + +require_once('commandLine.inc'); + +foreach(array_keys($wgAllMessagesEn) as $key) + echo "$key\n"; +?> diff --git a/maintenance/apache-ampersand.diff b/maintenance/apache-ampersand.diff new file mode 100644 index 00000000..f281ce15 --- /dev/null +++ b/maintenance/apache-ampersand.diff @@ -0,0 +1,53 @@ +--- orig/apache_1.3.26/src/modules/standard/mod_rewrite.h Wed Mar 13 13:05:34 2002 ++++ apache_1.3.26/src/modules/standard/mod_rewrite.h Tue Oct 15 14:07:21 2002 +@@ -447,6 +447,7 @@ + static char *rewrite_mapfunc_toupper(request_rec *r, char *key); + static char *rewrite_mapfunc_tolower(request_rec *r, char *key); + static char *rewrite_mapfunc_escape(request_rec *r, char *key); ++static char *rewrite_mapfunc_ampescape(request_rec *r, char *key); + static char *rewrite_mapfunc_unescape(request_rec *r, char *key); + static char *select_random_value_part(request_rec *r, char *value); + static void rewrite_rand_init(void); +--- orig/apache_1.3.26/src/modules/standard/mod_rewrite.c Wed May 29 10:39:23 2002 ++++ apache_1.3.26/src/modules/standard/mod_rewrite.c Tue Oct 15 14:07:49 2002 +@@ -502,6 +502,9 @@ + else if (strcmp(a2+4, "unescape") == 0) { + new->func = rewrite_mapfunc_unescape; + } ++ else if (strcmp(a2+4, "ampescape") == 0) { ++ new->func = rewrite_mapfunc_ampescape; ++ } + else if (sconf->state == ENGINE_ENABLED) { + return ap_pstrcat(cmd->pool, "RewriteMap: internal map not found:", + a2+4, NULL); +@@ -2982,6 +2985,30 @@ + + value = ap_escape_uri(r->pool, key); + return value; ++} ++ ++static char *rewrite_mapfunc_ampescape(request_rec *r, char *key) ++{ ++ /* We only need to escape the ampersand */ ++ char *copy = ap_palloc(r->pool, 3 * strlen(key) + 3); ++ const unsigned char *s = (const unsigned char *)key; ++ unsigned char *d = (unsigned char *)copy; ++ unsigned c; ++ ++ while ((c = *s)) { ++ if (c == '&') { ++ *d++ = '%'; ++ *d++ = '2'; ++ *d++ = '6'; ++ } ++ else { ++ *d++ = c; ++ } ++ ++s; ++ } ++ *d = '\0'; ++ ++ return copy; + } + + static char *rewrite_mapfunc_unescape(request_rec *r, char *key) diff --git a/maintenance/archives/.htaccess b/maintenance/archives/.htaccess new file mode 100644 index 00000000..3a428827 --- /dev/null +++ b/maintenance/archives/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/maintenance/archives/patch-archive-rev_id.sql b/maintenance/archives/patch-archive-rev_id.sql new file mode 100644 index 00000000..375001b8 --- /dev/null +++ b/maintenance/archives/patch-archive-rev_id.sql @@ -0,0 +1,6 @@ +-- New field in archive table to preserve revision IDs across undeletion. +-- Added 2005-03-10 + +ALTER TABLE /*$wgDBprefix*/archive + ADD + ar_rev_id int(8) unsigned; diff --git a/maintenance/archives/patch-archive-text_id.sql b/maintenance/archives/patch-archive-text_id.sql new file mode 100644 index 00000000..f59715ff --- /dev/null +++ b/maintenance/archives/patch-archive-text_id.sql @@ -0,0 +1,14 @@ +-- New field in archive table to preserve text source IDs across undeletion. +-- +-- Older entries containing NULL in this field will contain text in the +-- ar_text and ar_flags fields, and will cause the (re)creation of a new +-- text record upon undeletion. +-- +-- Newer ones will reference a text.old_id with this field, and the existing +-- entries will be used as-is; only a revision record need be created. +-- +-- Added 2005-05-01 + +ALTER TABLE /*$wgDBprefix*/archive + ADD + ar_text_id int(8) unsigned; diff --git a/maintenance/archives/patch-bot.sql b/maintenance/archives/patch-bot.sql new file mode 100644 index 00000000..ce61884c --- /dev/null +++ b/maintenance/archives/patch-bot.sql @@ -0,0 +1,11 @@ +-- Add field to recentchanges for easy filtering of bot entries +-- edits by a user with 'bot' in user.user_rights should be +-- marked 1 in rc_bot. + +-- Change made 2002-12-15 by Brion VIBBER <brion@pobox.com> +-- this affects code in Article.php, User.php SpecialRecentchanges.php +-- column also added to buildTables.inc + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD COLUMN rc_bot tinyint(3) unsigned NOT NULL default '0' + AFTER rc_minor; diff --git a/maintenance/archives/patch-cache.sql b/maintenance/archives/patch-cache.sql new file mode 100644 index 00000000..5651c3ce --- /dev/null +++ b/maintenance/archives/patch-cache.sql @@ -0,0 +1,41 @@ +-- patch-cache.sql +-- 2003-03-22 <brion@pobox.com> +-- +-- Add 'last touched' fields to cur and user tables. +-- These are useful for maintaining cache consistency. +-- (Updates to OutputPage.php and elsewhere.) +-- +-- cur_touched should be set to the current time whenever: +-- * the page is updated +-- * a linked page is created +-- * a linked page is destroyed +-- +-- The cur_touched time will then be compared against the +-- timestamps of cached pages to ensure consistency; if +-- cur_touched is later, the page must be regenerated. + +ALTER TABLE /*$wgDBprefix*/cur + ADD COLUMN cur_touched char(14) binary NOT NULL default ''; + +-- Existing pages should be initialized to the current +-- time so they don't needlessly rerender until they are +-- changed for the first time: + +UPDATE /*$wgDBprefix*/cur + SET cur_touched=NOW()+0; + +-- user_touched should be set to the current time whenever: +-- * the user logs in +-- * the user saves preferences (if no longer default...?) +-- * the user's newtalk status is altered +-- +-- The user_touched time should also be checked against the +-- timestamp reported by a browser requesting revalidation. +-- If user_touched is later than the reported last modified +-- time, the page should be rerendered with new options and +-- sent again. + +ALTER TABLE /*$wgDBprefix*/user + ADD COLUMN user_touched char(14) binary NOT NULL default ''; +UPDATE /*$wgDBprefix*/user + SET user_touched=NOW()+0; diff --git a/maintenance/archives/patch-categorylinks.sql b/maintenance/archives/patch-categorylinks.sql new file mode 100644 index 00000000..53c82fc0 --- /dev/null +++ b/maintenance/archives/patch-categorylinks.sql @@ -0,0 +1,39 @@ +-- +-- Track category inclusions *used inline* +-- This tracks a single level of category membership +-- (folksonomic tagging, really). +-- +CREATE TABLE /*$wgDBprefix*/categorylinks ( + -- Key to page_id of the page defined as a category member. + cl_from int(8) unsigned NOT NULL default '0', + + -- Name of the category. + -- This is also the page_title of the category's description page; + -- all such pages are in namespace 14 (NS_CATEGORY). + cl_to varchar(255) binary NOT NULL default '', + + -- The title of the linking page, or an optional override + -- to determine sort order. Sorting is by binary order, which + -- isn't always ideal, but collations seem to be an exciting + -- and dangerous new world in MySQL... + -- + -- For MySQL 4.1+ with charset set to utf8, the sort key *index* + -- needs cut to be smaller than 1024 bytes (at 3 bytes per char). + -- To sort properly on the shorter key, this field needs to be + -- the same shortness. + cl_sortkey varchar(86) binary NOT NULL default '', + + -- This isn't really used at present. Provided for an optional + -- sorting method by approximate addition time. + cl_timestamp timestamp NOT NULL, + + UNIQUE KEY cl_from(cl_from,cl_to), + + -- This key is trouble. It's incomplete, AND it's too big + -- when collation is set to UTF-8. Bleeeacch! + KEY cl_sortkey(cl_to,cl_sortkey), + + -- Not really used? + KEY cl_timestamp(cl_to,cl_timestamp) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-drop-user_newtalk.sql b/maintenance/archives/patch-drop-user_newtalk.sql new file mode 100644 index 00000000..6ec84fb3 --- /dev/null +++ b/maintenance/archives/patch-drop-user_newtalk.sql @@ -0,0 +1,3 @@ +-- Patch for email authentication T.Gries/M.Arndt 27.11.2004 +-- Table user_newtalk is dropped, as the table watchlist is now also used for storing user_talk-page notifications +DROP TABLE /*$wgDBprefix*/user_newtalk; diff --git a/maintenance/archives/patch-drop_img_type.sql b/maintenance/archives/patch-drop_img_type.sql new file mode 100644 index 00000000..e3737617 --- /dev/null +++ b/maintenance/archives/patch-drop_img_type.sql @@ -0,0 +1,3 @@ +-- img_type is no longer used, delete it + +ALTER TABLE /*$wgDBprefix*/image DROP COLUMN img_type; diff --git a/maintenance/archives/patch-email-authentication.sql b/maintenance/archives/patch-email-authentication.sql new file mode 100644 index 00000000..b35b10f1 --- /dev/null +++ b/maintenance/archives/patch-email-authentication.sql @@ -0,0 +1,3 @@ +-- Added early in 1.5 alpha development, removed 2005-04-25 + +ALTER TABLE /*$wgDBprefix*/user DROP COLUMN user_emailauthenticationtimestamp; diff --git a/maintenance/archives/patch-email-notification.sql b/maintenance/archives/patch-email-notification.sql new file mode 100644 index 00000000..f9bc0440 --- /dev/null +++ b/maintenance/archives/patch-email-notification.sql @@ -0,0 +1,11 @@ +-- Patch for email notification on page changes T.Gries/M.Arndt 11.09.2004 + +-- A new column 'wl_notificationtimestamp' is added to the table 'watchlist'. +-- When a page watched by a user X is changed by someone else, an email is sent to the watching user X +-- if and only if the field 'wl_notificationtimestamp' is '0'. The time/date of sending the mail is then stored in that field. +-- Further pages changes do not trigger new notification mails as long as user X has not re-visited that page. +-- The field is reset to '0' when user X re-visits the page or when he or she resets all notification timestamps +-- ("notification flags") at once by clicking the new button on his/her watchlist page. +-- T. Gries/M. Arndt 11.09.2004 - December 2004 + +ALTER TABLE /*$wgDBprefix*/watchlist ADD (wl_notificationtimestamp varchar(14) binary); diff --git a/maintenance/archives/patch-externallinks.sql b/maintenance/archives/patch-externallinks.sql new file mode 100644 index 00000000..d1aa5764 --- /dev/null +++ b/maintenance/archives/patch-externallinks.sql @@ -0,0 +1,13 @@ +-- +-- Track links to external URLs +-- +CREATE TABLE /*$wgDBprefix*/externallinks ( + el_from int(8) unsigned NOT NULL default '0', + el_to blob NOT NULL default '', + el_index blob NOT NULL default '', + + KEY (el_from, el_to(40)), + KEY (el_to(60), el_from), + KEY (el_index(60)) +) TYPE=InnoDB; + diff --git a/maintenance/archives/patch-filearchive.sql b/maintenance/archives/patch-filearchive.sql new file mode 100644 index 00000000..4bf09366 --- /dev/null +++ b/maintenance/archives/patch-filearchive.sql @@ -0,0 +1,51 @@ +-- +-- Record of deleted file data +-- +CREATE TABLE /*$wgDBprefix*/filearchive ( + -- Unique row id + fa_id int not null auto_increment, + + -- Original base filename; key to image.img_name, page.page_title, etc + fa_name varchar(255) binary NOT NULL default '', + + -- Filename of archived file, if an old revision + fa_archive_name varchar(255) binary default '', + + -- Which storage bin (directory tree or object store) the file data + -- is stored in. Should be 'deleted' for files that have been deleted; + -- any other bin is not yet in use. + fa_storage_group varchar(16), + + -- SHA-1 of the file contents plus extension, used as a key for storage. + -- eg 8f8a562add37052a1848ff7771a2c515db94baa9.jpg + -- + -- If NULL, the file was missing at deletion time or has been purged + -- from the archival storage. + fa_storage_key varchar(64) binary default '', + + -- Deletion information, if this file is deleted. + fa_deleted_user int, + fa_deleted_timestamp char(14) binary default '', + fa_deleted_reason text, + + -- Duped fields from image + fa_size int(8) unsigned default '0', + fa_width int(5) default '0', + fa_height int(5) default '0', + fa_metadata mediumblob, + fa_bits int(3) default '0', + fa_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + fa_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") default "unknown", + fa_minor_mime varchar(32) default "unknown", + fa_description tinyblob default '', + fa_user int(5) unsigned default '0', + fa_user_text varchar(255) binary default '', + fa_timestamp char(14) binary default '', + + PRIMARY KEY (fa_id), + INDEX (fa_name, fa_timestamp), -- pick out by image name + INDEX (fa_storage_group, fa_storage_key), -- pick out dupe files + INDEX (fa_deleted_timestamp), -- sort by deletion time + INDEX (fa_deleted_user) -- sort by deleter + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-hitcounter.sql b/maintenance/archives/patch-hitcounter.sql new file mode 100644 index 00000000..260f717f --- /dev/null +++ b/maintenance/archives/patch-hitcounter.sql @@ -0,0 +1,9 @@ +-- +-- hitcounter table is used to buffer page hits before they are periodically +-- counted and added to the cur_counter column in the cur table. +-- December 2003 +-- + +CREATE TABLE /*$wgDBprefix*/hitcounter ( + hc_id INTEGER UNSIGNED NOT NULL +) TYPE=HEAP MAX_ROWS=25000; diff --git a/maintenance/archives/patch-image_name_primary.sql b/maintenance/archives/patch-image_name_primary.sql new file mode 100644 index 00000000..5bd88264 --- /dev/null +++ b/maintenance/archives/patch-image_name_primary.sql @@ -0,0 +1,6 @@ +-- Make the image name index unique + +ALTER TABLE /*$wgDBprefix*/image DROP INDEX img_name; + +ALTER TABLE /*$wgDBprefix*/image + ADD PRIMARY KEY img_name (img_name); diff --git a/maintenance/archives/patch-image_name_unique.sql b/maintenance/archives/patch-image_name_unique.sql new file mode 100644 index 00000000..5cf02d41 --- /dev/null +++ b/maintenance/archives/patch-image_name_unique.sql @@ -0,0 +1,6 @@ +-- Make the image name index unique + +ALTER TABLE /*$wgDBprefix*/image DROP INDEX img_name; + +ALTER TABLE /*$wgDBprefix*/image + ADD UNIQUE INDEX img_name (img_name); diff --git a/maintenance/archives/patch-img_exif.sql b/maintenance/archives/patch-img_exif.sql new file mode 100644 index 00000000..2fd78f76 --- /dev/null +++ b/maintenance/archives/patch-img_exif.sql @@ -0,0 +1,3 @@ +-- Extra image exif metadata, added for 1.5 but quickly removed. + +ALTER TABLE /*$wgDBprefix*/image DROP img_exif; diff --git a/maintenance/archives/patch-img_media_type.sql b/maintenance/archives/patch-img_media_type.sql new file mode 100644 index 00000000..2356fc63 --- /dev/null +++ b/maintenance/archives/patch-img_media_type.sql @@ -0,0 +1,17 @@ +-- media type columns, added for 1.5 +-- this alters the scheme for 1.5, img_type is no longer used. + +ALTER TABLE /*$wgDBprefix*/image ADD ( + -- Media type as defined by the MEDIATYPE_xxx constants + img_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + + -- major part of a MIME media type as defined by IANA + -- see http://www.iana.org/assignments/media-types/ + img_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") NOT NULL default "unknown", + + -- minor part of a MIME media type as defined by IANA + -- the minor parts are not required to adher to any standard + -- but should be consistent throughout the database + -- see http://www.iana.org/assignments/media-types/ + img_minor_mime varchar(32) NOT NULL default "unknown" +); diff --git a/maintenance/archives/patch-img_metadata.sql b/maintenance/archives/patch-img_metadata.sql new file mode 100644 index 00000000..407e4325 --- /dev/null +++ b/maintenance/archives/patch-img_metadata.sql @@ -0,0 +1,6 @@ +-- Moving img_exif to img_metadata, so the name won't be so confusing when we +-- Use it for Ogg metadata or something like that. + +ALTER TABLE /*$wgDBprefix*/image ADD ( + img_metadata mediumblob NOT NULL +); diff --git a/maintenance/archives/patch-img_width.sql b/maintenance/archives/patch-img_width.sql new file mode 100644 index 00000000..c99bd46d --- /dev/null +++ b/maintenance/archives/patch-img_width.sql @@ -0,0 +1,18 @@ +-- Extra image metadata, added for 1.5 + +-- NOTE: as by patch-img_media_type.sql, the img_type +-- column is no longer used and has therefore be removed from this patch + +ALTER TABLE /*$wgDBprefix*/image ADD ( + img_width int(5) NOT NULL default 0, + img_height int(5) NOT NULL default 0, + img_bits int(5) NOT NULL default 0 +); + +ALTER TABLE /*$wgDBprefix*/oldimage ADD ( + oi_width int(5) NOT NULL default 0, + oi_height int(5) NOT NULL default 0, + oi_bits int(3) NOT NULL default 0 +); + + diff --git a/maintenance/archives/patch-indexes.sql b/maintenance/archives/patch-indexes.sql new file mode 100644 index 00000000..23eec07d --- /dev/null +++ b/maintenance/archives/patch-indexes.sql @@ -0,0 +1,24 @@ +-- +-- patch-indexes.sql +-- +-- Fix up table indexes; new to stable release in November 2003 +-- + +ALTER TABLE /*$wgDBprefix*/links + DROP INDEX l_from, + ADD INDEX l_from (l_from); + +ALTER TABLE /*$wgDBprefix*/brokenlinks + DROP INDEX bl_to, + ADD INDEX bl_to (bL_to); + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD INDEX rc_timestamp (rc_timestamp), + ADD INDEX rc_namespace_title (rc_namespace, rc_title), + ADD INDEX rc_cur_id (rc_cur_id); + +ALTER TABLE /*$wgDBprefix*/archive + ADD KEY name_title_timestamp (ar_namespace,ar_title,ar_timestamp); + +ALTER TABLE /*$wgDBprefix*/watchlist + ADD KEY namespace_title (wl_namespace,wl_title); diff --git a/maintenance/archives/patch-interwiki-trans.sql b/maintenance/archives/patch-interwiki-trans.sql new file mode 100644 index 00000000..2384a66a --- /dev/null +++ b/maintenance/archives/patch-interwiki-trans.sql @@ -0,0 +1,2 @@ +ALTER TABLE /*$wgDBprefix*/interwiki + ADD COLUMN iw_trans TINYINT(1) NOT NULL DEFAULT 0; diff --git a/maintenance/archives/patch-interwiki.sql b/maintenance/archives/patch-interwiki.sql new file mode 100644 index 00000000..90b162ef --- /dev/null +++ b/maintenance/archives/patch-interwiki.sql @@ -0,0 +1,20 @@ +-- Creates interwiki prefix<->url mapping table +-- used from 2003-08-21 dev version. +-- Import the default mappings from maintenance/interwiki.sql + +CREATE TABLE /*$wgDBprefix*/interwiki ( + -- The interwiki prefix, (e.g. "Meatball", or the language prefix "de") + iw_prefix char(32) NOT NULL, + + -- The URL of the wiki, with "$1" as a placeholder for an article name. + -- Any spaces in the name will be transformed to underscores before + -- insertion. + iw_url char(127) NOT NULL, + + -- A boolean value indicating whether the wiki is in this project + -- (used, for example, to detect redirect loops) + iw_local BOOL NOT NULL, + + UNIQUE KEY iw_prefix (iw_prefix) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-inverse_timestamp.sql b/maintenance/archives/patch-inverse_timestamp.sql new file mode 100644 index 00000000..0f7d66f1 --- /dev/null +++ b/maintenance/archives/patch-inverse_timestamp.sql @@ -0,0 +1,15 @@ +-- Removes the inverse_timestamp field from early 1.5 alphas. +-- This field was used in the olden days as a crutch for sorting +-- limitations in MySQL 3.x, but is being dropped now as an +-- unnecessary burden. Serious wikis should be running on 4.x. +-- +-- Updater added 2005-03-13 + +ALTER TABLE /*$wgDBprefix*/revision + DROP COLUMN inverse_timestamp, + DROP INDEX page_timestamp, + DROP INDEX user_timestamp, + DROP INDEX usertext_timestamp, + ADD INDEX page_timestamp (rev_page,rev_timestamp), + ADD INDEX user_timestamp (rev_user,rev_timestamp), + ADD INDEX usertext_timestamp (rev_user_text,rev_timestamp); diff --git a/maintenance/archives/patch-ipb_expiry.sql b/maintenance/archives/patch-ipb_expiry.sql new file mode 100644 index 00000000..0f106d70 --- /dev/null +++ b/maintenance/archives/patch-ipb_expiry.sql @@ -0,0 +1,8 @@ +-- Adds the ipb_expiry field to ipblocks + +ALTER TABLE /*$wgDBprefix*/ipblocks ADD ipb_expiry char(14) binary NOT NULL default ''; + +-- All IP blocks have one day expiry +UPDATE /*$wgDBprefix*/ipblocks SET ipb_expiry = date_format(date_add(ipb_timestamp,INTERVAL 1 DAY),"%Y%m%d%H%i%s") WHERE ipb_user = 0; + +-- Null string is fine for user blocks, since this indicates infinity diff --git a/maintenance/archives/patch-ipb_range_start.sql b/maintenance/archives/patch-ipb_range_start.sql new file mode 100644 index 00000000..c31e2d9c --- /dev/null +++ b/maintenance/archives/patch-ipb_range_start.sql @@ -0,0 +1,25 @@ +-- Add the range handling fields +ALTER TABLE /*$wgDBprefix*/ipblocks + ADD ipb_range_start varchar(32) NOT NULL default '', + ADD ipb_range_end varchar(32) NOT NULL default '', + ADD INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)); + + +-- Initialise fields +-- Only range blocks match ipb_address LIKE '%/%', this fact is used in the code already +UPDATE /*$wgDBprefix*/ipblocks + SET + ipb_range_start = LPAD(HEX( + (SUBSTRING_INDEX(ipb_address, '.', 1) << 24) + + (SUBSTRING_INDEX(SUBSTRING_INDEX(ipb_address, '.', 2), '.', -1) << 16) + + (SUBSTRING_INDEX(SUBSTRING_INDEX(ipb_address, '.', 3), '.', -1) << 24) + + (SUBSTRING_INDEX(SUBSTRING_INDEX(ipb_address, '/', 1), '.', -1)) ), 8, '0' ), + + ipb_range_end = LPAD(HEX( + (SUBSTRING_INDEX(ipb_address, '.', 1) << 24) + + (SUBSTRING_INDEX(SUBSTRING_INDEX(ipb_address, '.', 2), '.', -1) << 16) + + (SUBSTRING_INDEX(SUBSTRING_INDEX(ipb_address, '.', 3), '.', -1) << 24) + + (SUBSTRING_INDEX(SUBSTRING_INDEX(ipb_address, '/', 1), '.', -1)) + + ((1 << (32 - SUBSTRING_INDEX(ipb_address, '/', -1))) - 1) ), 8, '0' ) + + WHERE ipb_address LIKE '%/%'; diff --git a/maintenance/archives/patch-ipblocks.sql b/maintenance/archives/patch-ipblocks.sql new file mode 100644 index 00000000..8e47798b --- /dev/null +++ b/maintenance/archives/patch-ipblocks.sql @@ -0,0 +1,6 @@ +-- For auto-expiring blocks -- + +ALTER TABLE /*$wgDBprefix*/ipblocks + ADD ipb_auto tinyint(1) NOT NULL default '0', + ADD ipb_id int(8) NOT NULL auto_increment, + ADD PRIMARY KEY (ipb_id); diff --git a/maintenance/archives/patch-job.sql b/maintenance/archives/patch-job.sql new file mode 100644 index 00000000..89918456 --- /dev/null +++ b/maintenance/archives/patch-job.sql @@ -0,0 +1,20 @@ + +-- Jobs performed by parallel apache threads or a command-line daemon +CREATE TABLE /*$wgDBprefix*/job ( + job_id int(9) unsigned NOT NULL auto_increment, + + -- Command name, currently only refreshLinks is defined + job_cmd varchar(255) NOT NULL default '', + + -- Namespace and title to act on + -- Should be 0 and '' if the command does not operate on a title + job_namespace int NOT NULL, + job_title varchar(255) binary NOT NULL, + + -- Any other parameters to the command + -- Presently unused, format undefined + job_params blob NOT NULL default '', + + PRIMARY KEY job_id (job_id), + KEY (job_cmd, job_namespace, job_title) +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-langlinks.sql b/maintenance/archives/patch-langlinks.sql new file mode 100644 index 00000000..9c3b7e54 --- /dev/null +++ b/maintenance/archives/patch-langlinks.sql @@ -0,0 +1,14 @@ +CREATE TABLE /*$wgDBprefix*/langlinks ( + -- page_id of the referring page + ll_from int(8) unsigned NOT NULL default '0', + + -- Language code of the target + ll_lang varchar(10) binary NOT NULL default '', + + -- Title of the target, including namespace + ll_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY (ll_from, ll_lang), + KEY (ll_lang, ll_title) +) TYPE=InnoDB; + diff --git a/maintenance/archives/patch-linkscc-1.3.sql b/maintenance/archives/patch-linkscc-1.3.sql new file mode 100644 index 00000000..e397fcb9 --- /dev/null +++ b/maintenance/archives/patch-linkscc-1.3.sql @@ -0,0 +1,6 @@ +-- +-- linkscc table used to cache link lists in easier to digest form. +-- New schema for 1.3 - removes old lcc_title column. +-- May 2004 +-- +ALTER TABLE /*$wgDBprefix*/linkscc DROP COLUMN lcc_title;
\ No newline at end of file diff --git a/maintenance/archives/patch-linkscc.sql b/maintenance/archives/patch-linkscc.sql new file mode 100644 index 00000000..91d4da56 --- /dev/null +++ b/maintenance/archives/patch-linkscc.sql @@ -0,0 +1,12 @@ +-- +-- linkscc table used to cache link lists in easier to digest form +-- November 2003 +-- +-- Format later updated. +-- + +CREATE TABLE /*$wgDBprefix*/linkscc ( + lcc_pageid INT UNSIGNED NOT NULL UNIQUE KEY, + lcc_cacheobj MEDIUMBLOB NOT NULL + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-linktables.sql b/maintenance/archives/patch-linktables.sql new file mode 100644 index 00000000..bb9bd033 --- /dev/null +++ b/maintenance/archives/patch-linktables.sql @@ -0,0 +1,70 @@ +-- +-- Track links that do exist +-- l_from and l_to key to cur_id +-- +DROP TABLE IF EXISTS /*$wgDBprefix*/links; +CREATE TABLE /*$wgDBprefix*/links ( + -- Key to the page_id of the page containing the link. + l_from int(8) unsigned NOT NULL default '0', + + -- Key to the page_id of the link target. + -- An unfortunate consequence of this is that rename + -- operations require changing the links entries for + -- all links to the moved page. + l_to int(8) unsigned NOT NULL default '0', + + UNIQUE KEY l_from(l_from,l_to), + KEY (l_to) + +) TYPE=InnoDB; + +-- +-- Track links to pages that don't yet exist. +-- bl_from keys to cur_id +-- bl_to is a text link (namespace:title) +-- +DROP TABLE IF EXISTS /*$wgDBprefix*/brokenlinks; +CREATE TABLE /*$wgDBprefix*/brokenlinks ( + -- Key to the page_id of the page containing the link. + bl_from int(8) unsigned NOT NULL default '0', + + -- Text of the target page title ("namesapce:title"). + -- Unfortunately this doesn't split the namespace index + -- key and therefore can't easily be joined to anything. + bl_to varchar(255) binary NOT NULL default '', + UNIQUE KEY bl_from(bl_from,bl_to), + KEY (bl_to) + +) TYPE=InnoDB; + +-- +-- Track links to images *used inline* +-- il_from keys to cur_id, il_to keys to image_name. +-- We don't distinguish live from broken links. +-- +DROP TABLE IF EXISTS /*$wgDBprefix*/imagelinks; +CREATE TABLE /*$wgDBprefix*/imagelinks ( + -- Key to page_id of the page containing the image / media link. + il_from int(8) unsigned NOT NULL default '0', + + -- Filename of target image. + -- This is also the page_title of the file's description page; + -- all such pages are in namespace 6 (NS_IMAGE). + il_to varchar(255) binary NOT NULL default '', + + UNIQUE KEY il_from(il_from,il_to), + KEY (il_to) + +) TYPE=InnoDB; + +-- +-- Stores (possibly gzipped) serialized objects with +-- cache arrays to reduce database load slurping up +-- from links and brokenlinks. +-- +DROP TABLE IF EXISTS /*$wgDBprefix*/linkscc; +CREATE TABLE /*$wgDBprefix*/linkscc ( + lcc_pageid INT UNSIGNED NOT NULL UNIQUE KEY, + lcc_cacheobj MEDIUMBLOB NOT NULL + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-list.txt b/maintenance/archives/patch-list.txt new file mode 100644 index 00000000..93a63bfd --- /dev/null +++ b/maintenance/archives/patch-list.txt @@ -0,0 +1,182 @@ +List of database patches and upgrades as the PediaWiki software evolves... + +* 2002-11-23: Search index format changed for UTF-8 wikis +For wikis using the UTF-8 languages, the search index entries +need to be rebuild to allow searching to work. (Other wikis +that have been run through the old phase2->phase3 conversion +script should also be reindexed to catch apostrophe misplacement.) + +Run rebuildIndex.php on your wiki. + + + +* 2002-11-27: Watchlist format changed +Converts the user_watchlist entries out to a separate table which +links user_id<->cur_id and can be more handily queried. + +Run upgradeWatchlist.php on your wiki. + + + +* 2002-12-14: Recentchanges table bot/hidden column +Adds a column to indicate changes by registered bots (or perhaps +later other admin actions) that should be hidden from the default +Recentchanges list because people think they're tedious, but should +still be available in article histories, contribs lists, and +power-user RC lists. + +Run bot.sql against your database. + + + +* 2002-12-17: Watchlist format changed again +Now using namespace, title instead of cur_id. This can track deleted/ +recreated pages better, makes it easier to handle talk pages (now with +the auto-watch feature there's a lot more watching of talk pages!) +and whatnot. + +Run patch-watchlist.sql against your database. If all is well, drop +the oldwatchlist table which is no longer needed. (Note that this update +also drops the vestigial user_watchlist column.) + + + +* 2002-12-26: TeX math rendering adds 'math' table +A new 'math' table is used to cache TeX sections. + +Run patch-math.sql against your database, and add 'tmp' and 'math' +subdirectories to your tree alongside the upload directory, and copy +the 'math' source subdirectory under the wiki's PHP directory and run +"make" to compile the texvc evaluator. (whew!) + +TeX support requires TeX, OCaml, and ImageMagick. If you don't want +to use TeX support on your wiki, you can globally disable it by +setting $wgUseTeX=false in LocalSettings.php. + + + +* 2003-01-25: searchindex table +A new 'searchindex' table separates the fulltext index fields from +'cur'. This enables use of InnoDB tables, which don't support fulltext +search, for the main data, and will keep junk out of the backup dumps. + +Run patch-searchindex.sql on the database. If you wish to change table +tables on the others, use 'alter table' manually. (See MySQL docs.) + + +* 2003-01-24: Talk pages for anonymous users +A new table user_newtalk contains a list of talk pages that were +changed, both pages by anonymous and those by non-anonymous users. + +Run patch-usernewtalk.sql if your database was created before +this date. + + +* 2003-02-02: Math table changed +Rerun patch-math.sql to recreate it. + +* 2003-02-03: Index added to USER table for performance reasons. Run +patch-userindex.sql to create it. + + +* 2003-02-09: Random table & inverse timestamps +The random page queue table has been removed in favor of a column +in the cur table. This eliminates the ssllooww queue refill step; +pre-storing random indices in an indexed column means we can do the +random sort instantly; each element is re-randomized upon selection. + +Also, an inverse_timestamp field has been added to the cur and old +tables. This will allow fast index-based sorting in history lists, +user contribs, linked recentchanges, etc with MySQL 3, which doesn't +allow DESC ordering on an indexed field. This may be removed later +when MySQL is found to be stable. + + +* 2003-03-22: Last touched fields for caching +'Last touched' timestamp fields have been added to the cur and user +tables to aid in maintaining cache consistency. Web clients will +be forced to reload a page if it has been touched since the client's +cached copy (this will catch indirect changes like creation of +linked pages) or if a user changes preferences or logs in anew (so +visual changes and login status are taken into account). + +Run patch-cache.sql on the database to set these fields up. This is +required for changes to OutputPage.php and elsewhere to continue +working on an older database. + + +* 2003-05-23: Index for "Oldest articles" +"Oldest articles" needs an index on namespace, redirect and timestamp +to be reasonably fast. (patch-oldestindex.sql) + +OutputPage.php User.php maintenance/buildTables.inc maintenance/patch-cache.sql maintenance/patch-list.txt + +* 2003-09: Ipblocks auto-expiry update +patch-ipblocks.sql + +* Interwiki URL table +Moves the interwiki prefix<->url mapping table from a static array +into the database. If you've got a custom table, be sure to make +your changes! + +Run patch-interwiki.sql to create the interwiki table, then the +plain interwiki.sql to load up the default set of mappings. + +* 2003-05-30: File upload license fields +Adds fields to 'image' table. +INCOMPLETE, DO NOT USE + + +* 2003-08-21: Interwiki URL table +Moves the interwiki prefix<->url mapping table from a static array +into the database. If you've got a custom table, be sure to make +your changes! + +Run patch-interwiki.sql to create the interwiki table, then the +plain interwiki.sql to load up the default set of mappings. + +* 2003-09: Ipblocks auto-expiry update +patch-ipblocks.sql + +* Interwiki URL table +Moves the interwiki prefix<->url mapping table from a static array +into the database. If you've got a custom table, be sure to make +your changes! + +Run patch-interwiki.sql to create the interwiki table, then the +plain interwiki.sql to load up the default set of mappings. + +* 2003-11: Indexes +Fixes up indexes on links, brokenlinks, recentchanges, watchlist, +and archive tables to boost speed. + +Run patch-indexes.sql. + +* 2003-11: linkscc table creation +patch-linkscc.sql + + +* 2004-01-25: recentchanges additional index +Adds an index to recentchanges to optimize Special:Newpages +patch-rc-newindex.sql + +* 2004-02-14: Adds the ipb_expiry field to ipblocks +patch-ipb_expiry.sql + + +* 2004-03-11: Recreate links tables to avoid duplicating titles +everywhere. **Rebuild your links after this with refreshLinks.php** + +patch-linktables.sql + + +* 2004-04: Add user_real_name field +patch-user-realname.sql + +* 2004-05-08: Add querycache table for caching special pages and generic + object cache to cover some slow operations w/o memcached. +patch-querycache.sql +patch-objectcache.sql + +* 2004-05-14: Add categorylinks table for handling category membership +patch-categorylinks.sql diff --git a/maintenance/archives/patch-log_params.sql b/maintenance/archives/patch-log_params.sql new file mode 100644 index 00000000..aa00a673 --- /dev/null +++ b/maintenance/archives/patch-log_params.sql @@ -0,0 +1 @@ +ALTER TABLE /*$wgDBprefix*/logging ADD log_params blob NOT NULL default ''; diff --git a/maintenance/archives/patch-logging-times-index.sql b/maintenance/archives/patch-logging-times-index.sql new file mode 100644 index 00000000..e66ceec4 --- /dev/null +++ b/maintenance/archives/patch-logging-times-index.sql @@ -0,0 +1,9 @@ +-- +-- patch-logging-times-index.sql +-- +-- Add a very humble index on logging times +-- + +ALTER TABLE /*$wgDBprefix*/logging + ADD INDEX times (log_timestamp); + diff --git a/maintenance/archives/patch-logging-title.sql b/maintenance/archives/patch-logging-title.sql new file mode 100644 index 00000000..c5da0dc0 --- /dev/null +++ b/maintenance/archives/patch-logging-title.sql @@ -0,0 +1,6 @@ +-- 1.4 betas were missing the 'binary' marker from logging.log_title, +-- which causes a collation mismatch error on joins in MySQL 4.1. + +ALTER TABLE /*$wgDBprefix*/logging + CHANGE COLUMN log_title + log_title varchar(255) binary NOT NULL default ''; diff --git a/maintenance/archives/patch-logging.sql b/maintenance/archives/patch-logging.sql new file mode 100644 index 00000000..79bb53b5 --- /dev/null +++ b/maintenance/archives/patch-logging.sql @@ -0,0 +1,37 @@ +-- Add the logging table and adjust recentchanges to accomodate special pages +-- 2004-08-24 + +CREATE TABLE /*$wgDBprefix*/logging ( + -- Symbolic keys for the general log type and the action type + -- within the log. The output format will be controlled by the + -- action field, but only the type controls categorization. + log_type char(10) NOT NULL default '', + log_action char(10) NOT NULL default '', + + -- Timestamp. Duh. + log_timestamp char(14) NOT NULL default '19700101000000', + + -- The user who performed this action; key to user_id + log_user int unsigned NOT NULL default 0, + + -- Key to the page affected. Where a user is the target, + -- this will point to the user page. + log_namespace int NOT NULL default 0, + log_title varchar(255) binary NOT NULL default '', + + -- Freeform text. Interpreted as edit history comments. + log_comment varchar(255) NOT NULL default '', + + -- LF separated list of miscellaneous parameters + log_params blob NOT NULL default '', + + KEY type_time (log_type, log_timestamp), + KEY user_time (log_user, log_timestamp), + KEY page_time (log_namespace, log_title, log_timestamp) + +) TYPE=InnoDB; + + +-- Change from unsigned to signed so we can store special pages +ALTER TABLE recentchanges + MODIFY rc_namespace tinyint(3) NOT NULL default '0'; diff --git a/maintenance/archives/patch-math.sql b/maintenance/archives/patch-math.sql new file mode 100644 index 00000000..aee24a8a --- /dev/null +++ b/maintenance/archives/patch-math.sql @@ -0,0 +1,28 @@ +-- Creates table math used for caching TeX blocks. Needs to be run +-- on old installations when adding TeX support (2002-12-26) +-- Or, TeX can be disabled via $wgUseTeX=false in LocalSettings.php + +-- Note: math table has changed, and this script needs to be run again +-- to create it. (2003-02-02) + +DROP TABLE IF EXISTS /*$wgDBprefix*/math; +CREATE TABLE /*$wgDBprefix*/math ( + -- Binary MD5 hash of the latex fragment, used as an identifier key. + math_inputhash varchar(16) NOT NULL, + + -- Not sure what this is, exactly... + math_outputhash varchar(16) NOT NULL, + + -- texvc reports how well it thinks the HTML conversion worked; + -- if it's a low level the PNG rendering may be preferred. + math_html_conservativeness tinyint(1) NOT NULL, + + -- HTML output from texvc, if any + math_html text, + + -- MathML output from texvc, if any + math_mathml text, + + UNIQUE KEY math_inputhash (math_inputhash) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-mimesearch-indexes.sql b/maintenance/archives/patch-mimesearch-indexes.sql new file mode 100644 index 00000000..bd348c46 --- /dev/null +++ b/maintenance/archives/patch-mimesearch-indexes.sql @@ -0,0 +1,22 @@ +-- Add indexes to the mime types in image for use on Special:MIMEsearch, +-- changes a query like +-- +-- SELECT img_name FROM image WHERE img_major_mime = "image" AND img_minor_mime = "svg"; +-- from: +-- +-------+------+---------------+------+---------+------+------+-------------+ +-- | table | type | possible_keys | key | key_len | ref | rows | Extra | +-- +-------+------+---------------+------+---------+------+------+-------------+ +-- | image | ALL | NULL | NULL | NULL | NULL | 194 | Using where | +-- +-------+------+---------------+------+---------+------+------+-------------+ +-- to: +-- +-------+------+-------------------------------+----------------+---------+-------+------+-------------+ +-- | table | type | possible_keys | key | key_len | ref | rows | Extra | +-- +-------+------+-------------------------------+----------------+---------+-------+------+-------------+ +-- | image | ref | img_major_mime,img_minor_mime | img_minor_mime | 32 | const | 4 | Using where | +-- +-------+------+-------------------------------+----------------+---------+-------+------+-------------+ + +ALTER TABLE /*$wgDBprefix*/image + ADD INDEX img_major_mime (img_major_mime); +ALTER TABLE /*$wgDBprefix*/image + ADD INDEX img_minor_mime (img_minor_mime); + diff --git a/maintenance/archives/patch-objectcache.sql b/maintenance/archives/patch-objectcache.sql new file mode 100644 index 00000000..18572aa0 --- /dev/null +++ b/maintenance/archives/patch-objectcache.sql @@ -0,0 +1,9 @@ +-- For a few generic cache operations if not using Memcached +CREATE TABLE /*$wgDBprefix*/objectcache ( + keyname char(255) binary not null default '', + value mediumblob, + exptime datetime, + unique key (keyname), + key (exptime) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-oldestindex.sql b/maintenance/archives/patch-oldestindex.sql new file mode 100644 index 00000000..930214fd --- /dev/null +++ b/maintenance/archives/patch-oldestindex.sql @@ -0,0 +1,5 @@ +-- Add index for "Oldest articles" (Special:Ancientpages) +-- 2003-05-23 Erik Moeller <moeller@scireview.de> + +ALTER TABLE /*$wgDBprefix*/cur + ADD INDEX namespace_redirect_timestamp(cur_namespace,cur_is_redirect,cur_timestamp); diff --git a/maintenance/archives/patch-page_len.sql b/maintenance/archives/patch-page_len.sql new file mode 100644 index 00000000..c32dc8d4 --- /dev/null +++ b/maintenance/archives/patch-page_len.sql @@ -0,0 +1,16 @@ +-- Page length field (in bytes) for current revision of page. +-- Since page text is now stored separately, it may be compressed +-- or otherwise difficult to calculate. Additionally, the field +-- can be indexed for handy 'long' and 'short' page lists. +-- +-- Added 2005-03-12 + +ALTER TABLE /*$wgDBprefix*/page + ADD page_len int(8) unsigned NOT NULL, + ADD INDEX (page_len); + +-- Not accurate if upgrading from intermediate +-- 1.5 alpha and have revision compression on. +UPDATE /*$wgDBprefix*/page, /*$wgDBprefix*/text + SET page_len=LENGTH(old_text) + WHERE page_latest=old_id; diff --git a/maintenance/archives/patch-pagelinks.sql b/maintenance/archives/patch-pagelinks.sql new file mode 100644 index 00000000..7240cff9 --- /dev/null +++ b/maintenance/archives/patch-pagelinks.sql @@ -0,0 +1,56 @@ +-- +-- Create the new pagelinks table to merge links and brokenlinks data, +-- and populate it. +-- +-- Unlike the old links and brokenlinks, these records will not need to be +-- altered when target pages are created, deleted, or renamed. This should +-- reduce the amount of severe database frustration that happens when widely- +-- linked pages are altered. +-- +-- Fixups for brokenlinks to pages in namespaces need to be run after this; +-- this is done by updaters.inc if run through the regular update scripts. +-- +-- 2005-05-26 +-- + +-- +-- Track page-to-page hyperlinks within the wiki. +-- +CREATE TABLE /*$wgDBprefix*/pagelinks ( + -- Key to the page_id of the page containing the link. + pl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + pl_namespace int NOT NULL default '0', + pl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY pl_from(pl_from,pl_namespace,pl_title), + KEY (pl_namespace,pl_title) + +) TYPE=InnoDB; + + +-- Import existing-page links +INSERT + INTO /*$wgDBprefix*/pagelinks (pl_from,pl_namespace,pl_title) + SELECT l_from,page_namespace,page_title + FROM /*$wgDBprefix*/links, /*$wgDBprefix*/page + WHERE l_to=page_id; + +-- import brokenlinks +-- NOTE: We'll have to fix up individual entries that aren't in main NS +INSERT INTO /*$wgDBprefix*/pagelinks (pl_from,pl_namespace,pl_title) + SELECT bl_from, 0, bl_to + FROM /*$wgDBprefix*/brokenlinks; + +-- For each namespace do something like: +-- +-- UPDATE /*$wgDBprefix*/pagelinks +-- SET pl_namespace=$ns, +-- pl_title=TRIM(LEADING '$prefix:' FROM pl_title) +-- WHERE pl_namespace=0 +-- AND pl_title LIKE '$likeprefix:%'"; +-- diff --git a/maintenance/archives/patch-parsercache.sql b/maintenance/archives/patch-parsercache.sql new file mode 100644 index 00000000..854e6c57 --- /dev/null +++ b/maintenance/archives/patch-parsercache.sql @@ -0,0 +1,15 @@ +-- +-- parsercache table, for cacheing complete parsed articles +-- before they are imbedded in the skin. +-- + +CREATE TABLE /*$wgDBprefix*/parsercache ( + pc_pageid INT(11) NOT NULL, + pc_title VARCHAR(255) NOT NULL, + pc_prefhash CHAR(32) NOT NULL, + pc_expire DATETIME NOT NULL, + pc_data MEDIUMBLOB NOT NULL, + PRIMARY KEY (pc_pageid, pc_prefhash), + KEY(pc_title), + KEY(pc_expire) +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-profiling.sql b/maintenance/archives/patch-profiling.sql new file mode 100644 index 00000000..49b488e9 --- /dev/null +++ b/maintenance/archives/patch-profiling.sql @@ -0,0 +1,10 @@ +-- profiling table +-- This is optional + +CREATE TABLE /*$wgDBprefix*/profiling ( + pf_count integer not null default 0, + pf_time float not null default 0, + pf_name varchar(255) not null default '', + pf_server varchar(30) not null default '', + UNIQUE KEY pf_name_server (pf_name, pf_server) +) TYPE=HEAP; diff --git a/maintenance/archives/patch-querycache.sql b/maintenance/archives/patch-querycache.sql new file mode 100644 index 00000000..7df9129e --- /dev/null +++ b/maintenance/archives/patch-querycache.sql @@ -0,0 +1,16 @@ +-- Used for caching expensive grouped queries + +CREATE TABLE /*$wgDBprefix*/querycache ( + -- A key name, generally the base name of of the special page. + qc_type char(32) NOT NULL, + + -- Some sort of stored value. Sizes, counts... + qc_value int(5) unsigned NOT NULL default '0', + + -- Target namespace+title + qc_namespace int NOT NULL default '0', + qc_title char(255) binary NOT NULL default '', + + KEY (qc_type,qc_value) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-querycacheinfo.sql b/maintenance/archives/patch-querycacheinfo.sql new file mode 100644 index 00000000..0e34b3a5 --- /dev/null +++ b/maintenance/archives/patch-querycacheinfo.sql @@ -0,0 +1,12 @@ +CREATE TABLE /*$wgDBprefix*/querycache_info ( + + -- Special page name + -- Corresponds to a qc_type value + qci_type varchar(32) NOT NULL default '', + + -- Timestamp of last update + qci_timestamp char(14) NOT NULL default '19700101000000', + + UNIQUE KEY ( qci_type ) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-random-dateindex.sql b/maintenance/archives/patch-random-dateindex.sql new file mode 100644 index 00000000..5d514cc3 --- /dev/null +++ b/maintenance/archives/patch-random-dateindex.sql @@ -0,0 +1,54 @@ +-- patch-random-dateindex.sql +-- 2003-02-09 +-- +-- This patch does two things: +-- * Adds cur_random column to replace random table +-- (Requires change to SpecialRandom.php) +-- random table no longer needs refilling +-- Note: short-term duplicate results *are* possible, but very unlikely on large wiki +-- +-- * Adds inverse_timestamp columns to cur and old and indexes +-- to allow descending timestamp sort in history, contribs, etc +-- (Requires changes to Article.php, DatabaseFunctions.php, +-- ... ) +-- cur_timestamp inverse_timestamp +-- 99999999999999 - 20030209222556 = 79969790777443 +-- 99999999999999 - 20030211083412 = 79969788916587 +-- +-- We won't need this on MySQL 4; there will be a removal patch later. + +-- Indexes: +-- cur needs (cur_random) for random sort +-- cur and old need (namespace,title,timestamp) index for history,watchlist,rclinked +-- cur and old need (user,timestamp) index for contribs +-- cur and old need (user_text,timestamp) index for contribs + +ALTER TABLE /*$wgDBprefix*/cur + DROP INDEX cur_user, + DROP INDEX cur_user_text, + ADD COLUMN cur_random real unsigned NOT NULL, + ADD COLUMN inverse_timestamp char(14) binary NOT NULL default '', + ADD INDEX (cur_random), + ADD INDEX name_title_timestamp (cur_namespace,cur_title,inverse_timestamp), + ADD INDEX user_timestamp (cur_user,inverse_timestamp), + ADD INDEX usertext_timestamp (cur_user_text,inverse_timestamp); + +UPDATE /*$wgDBprefix*/cur SET + inverse_timestamp=99999999999999-cur_timestamp, + cur_random=RAND(); + +ALTER TABLE /*$wgDBprefix*/old + DROP INDEX old_user, + DROP INDEX old_user_text, + ADD COLUMN inverse_timestamp char(14) binary NOT NULL default '', + ADD INDEX name_title_timestamp (old_namespace,old_title,inverse_timestamp), + ADD INDEX user_timestamp (old_user,inverse_timestamp), + ADD INDEX usertext_timestamp (old_user_text,inverse_timestamp); + +UPDATE /*$wgDBprefix*/old SET + inverse_timestamp=99999999999999-old_timestamp; + +-- If leaving wiki publicly accessible in read-only mode during +-- the upgrade, comment out the below line; leave 'random' table +-- in place until the new software is installed. +DROP TABLE /*$wgDBprefix*/random; diff --git a/maintenance/archives/patch-rc-newindex.sql b/maintenance/archives/patch-rc-newindex.sql new file mode 100644 index 00000000..2315ff37 --- /dev/null +++ b/maintenance/archives/patch-rc-newindex.sql @@ -0,0 +1,9 @@ +-- +-- patch-rc-newindex.sql +-- Adds an index to recentchanges to optimize Special:Newpages +-- 2004-01-25 +-- + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD INDEX new_name_timestamp(rc_new,rc_namespace,rc_timestamp); + diff --git a/maintenance/archives/patch-rc-patrol.sql b/maintenance/archives/patch-rc-patrol.sql new file mode 100644 index 00000000..1839c1ee --- /dev/null +++ b/maintenance/archives/patch-rc-patrol.sql @@ -0,0 +1,9 @@ +-- +-- patch-rc-patrol.sql +-- Adds a row to recentchanges for the patrolling feature +-- 2004-08-09 +-- + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD COLUMN rc_patrolled tinyint(3) unsigned NOT NULL default '0'; + diff --git a/maintenance/archives/patch-rc_id.sql b/maintenance/archives/patch-rc_id.sql new file mode 100644 index 00000000..6dd9ef4a --- /dev/null +++ b/maintenance/archives/patch-rc_id.sql @@ -0,0 +1,7 @@ +-- Primary key in recentchanges + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD rc_id int(8) NOT NULL auto_increment, + ADD PRIMARY KEY rc_id (rc_id); + + diff --git a/maintenance/archives/patch-rc_ip.sql b/maintenance/archives/patch-rc_ip.sql new file mode 100644 index 00000000..a68a22cb --- /dev/null +++ b/maintenance/archives/patch-rc_ip.sql @@ -0,0 +1,7 @@ +-- Adding the rc_ip field for logging of IP addresses in recentchanges + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD rc_ip char(15) NOT NULL default '', + ADD INDEX rc_ip (rc_ip); + + diff --git a/maintenance/archives/patch-rc_type.sql b/maintenance/archives/patch-rc_type.sql new file mode 100644 index 00000000..1097771b --- /dev/null +++ b/maintenance/archives/patch-rc_type.sql @@ -0,0 +1,9 @@ +-- recentchanges improvements -- + +ALTER TABLE /*$wgDBprefix*/recentchanges + ADD rc_type tinyint(3) unsigned NOT NULL default '0', + ADD rc_moved_to_ns tinyint(3) unsigned NOT NULL default '0', + ADD rc_moved_to_title varchar(255) binary NOT NULL default ''; + +UPDATE /*$wgDBprefix*/recentchanges SET rc_type=1 WHERE rc_new; +UPDATE /*$wgDBprefix*/recentchanges SET rc_type=3 WHERE rc_namespace=4 AND (rc_title='Deletion_log' OR rc_title='Upload_log'); diff --git a/maintenance/archives/patch-rename-group.sql b/maintenance/archives/patch-rename-group.sql new file mode 100644 index 00000000..026b60bd --- /dev/null +++ b/maintenance/archives/patch-rename-group.sql @@ -0,0 +1,10 @@ +-- Rename groups table to groups, which is not a keyword +-- It was called group in a few alpha versions + +RENAME TABLE /*$wgDBprefix*/`group` TO /*$wgDBprefix*/groups; +ALTER TABLE /*$wgDBprefix*/groups + CHANGE group_id gr_id int(5) unsigned NOT NULL auto_increment, + CHANGE group_name gr_name varchar(50) NOT NULL default '', + CHANGE group_description gr_description varchar(255) NOT NULL default '', + CHANGE group_rights gr_rights tinyblob; + diff --git a/maintenance/archives/patch-rename-user_groups-and_rights.sql b/maintenance/archives/patch-rename-user_groups-and_rights.sql new file mode 100644 index 00000000..abd59319 --- /dev/null +++ b/maintenance/archives/patch-rename-user_groups-and_rights.sql @@ -0,0 +1,9 @@ + +ALTER TABLE /*$wgDBprefix*/user_groups + CHANGE user_id ug_user INT(5) UNSIGNED NOT NULL DEFAULT '0', + CHANGE group_id ug_group INT(5) UNSIGNED NOT NULL DEFAULT '0'; + +ALTER TABLE /*$wgDBprefix*/user_rights + CHANGE user_id ur_user INT(5) UNSIGNED NOT NULL, + CHANGE user_rights ur_rights TINYBLOB NOT NULL DEFAULT ''; + diff --git a/maintenance/archives/patch-restructure.sql b/maintenance/archives/patch-restructure.sql new file mode 100644 index 00000000..53f1836b --- /dev/null +++ b/maintenance/archives/patch-restructure.sql @@ -0,0 +1,147 @@ +-- The Great Restructuring of October 2004 +-- Creates 'page', 'revision' tables and transforms the classic +-- cur+old into a separate page+revision+text structure. +-- +-- The pre-conversion 'old' table is renamed to 'text' and used +-- without internal restructuring to avoid rebuilding the entire +-- table. (This can be done separately if desired.) +-- +-- The pre-conversion 'cur' table is now redundant and can be +-- discarded when done. + +CREATE TABLE /*$wgDBprefix*/page ( + page_id int(8) unsigned NOT NULL auto_increment, + page_namespace tinyint NOT NULL, + page_title varchar(255) binary NOT NULL, + page_restrictions tinyblob NOT NULL default '', + page_counter bigint(20) unsigned NOT NULL default '0', + page_is_redirect tinyint(1) unsigned NOT NULL default '0', + page_is_new tinyint(1) unsigned NOT NULL default '0', + page_random real unsigned NOT NULL, + page_touched char(14) binary NOT NULL default '', + page_latest int(8) unsigned NOT NULL, + page_len int(8) unsigned NOT NULL, + + PRIMARY KEY page_id (page_id), + UNIQUE INDEX name_title (page_namespace,page_title), + INDEX (page_random), + INDEX (page_len) +); + +CREATE TABLE /*$wgDBprefix*/revision ( + rev_id int(8) unsigned NOT NULL auto_increment, + rev_page int(8) unsigned NOT NULL, + rev_comment tinyblob NOT NULL default '', + rev_user int(5) unsigned NOT NULL default '0', + rev_user_text varchar(255) binary NOT NULL default '', + rev_timestamp char(14) binary NOT NULL default '', + rev_minor_edit tinyint(1) unsigned NOT NULL default '0', + rev_deleted tinyint(1) unsigned NOT NULL default '0', + + + PRIMARY KEY rev_page_id (rev_page, rev_id), + UNIQUE INDEX rev_id (rev_id), + INDEX rev_timestamp (rev_timestamp), + INDEX page_timestamp (rev_page,rev_timestamp), + INDEX user_timestamp (rev_user,rev_timestamp), + INDEX usertext_timestamp (rev_user_text,rev_timestamp) +); + +-- If creating new 'text' table it would look like this: +-- +-- CREATE TABLE /*$wgDBprefix*/text ( +-- old_id int(8) unsigned NOT NULL auto_increment, +-- old_text mediumtext NOT NULL default '', +-- old_flags tinyblob NOT NULL default '', +-- +-- PRIMARY KEY old_id (old_id) +-- ); + + +-- Lock! +LOCK TABLES /*$wgDBprefix*/page WRITE, /*$wgDBprefix*/revision WRITE, /*$wgDBprefix*/old WRITE, /*$wgDBprefix*/cur WRITE; + +-- Save the last old_id value for later +SELECT (@maxold:=MAX(old_id)) FROM /*$wgDBprefix*/old; + +-- First, copy all current entries into the old table. +INSERT + INTO /*$wgDBprefix*/old + (old_namespace, + old_title, + old_text, + old_comment, + old_user, + old_user_text, + old_timestamp, + old_minor_edit, + old_flags) + SELECT + cur_namespace, + cur_title, + cur_text, + cur_comment, + cur_user, + cur_user_text, + cur_timestamp, + cur_minor_edit, + '' + FROM /*$wgDBprefix*/cur; + +-- Now, copy all old data except the text into revisions +INSERT + INTO /*$wgDBprefix*/revision + (rev_id, + rev_page, + rev_comment, + rev_user, + rev_user_text, + rev_timestamp, + rev_minor_edit) + SELECT + old_id, + cur_id, + old_comment, + old_user, + old_user_text, + old_timestamp, + old_minor_edit + FROM /*$wgDBprefix*/old,/*$wgDBprefix*/cur + WHERE old_namespace=cur_namespace + AND old_title=cur_title; + +-- And, copy the cur data into page +INSERT + INTO /*$wgDBprefix*/page + (page_id, + page_namespace, + page_title, + page_restrictions, + page_counter, + page_is_redirect, + page_is_new, + page_random, + page_touched, + page_latest) + SELECT + cur_id, + cur_namespace, + cur_title, + cur_restrictions, + cur_counter, + cur_is_redirect, + cur_is_new, + cur_random, + cur_touched, + rev_id + FROM /*$wgDBprefix*/cur,/*$wgDBprefix*/revision + WHERE cur_id=rev_page + AND rev_timestamp=cur_timestamp + AND rev_id > @maxold; + +UNLOCK TABLES; + +-- Keep the old table around as the text store. +-- Its extra fields will be ignored, but trimming them is slow +-- so we won't bother doing it for now. +ALTER TABLE /*$wgDBprefix*/old RENAME TO /*$wgDBprefix*/text; diff --git a/maintenance/archives/patch-rev_deleted.sql b/maintenance/archives/patch-rev_deleted.sql new file mode 100644 index 00000000..3af0c1d7 --- /dev/null +++ b/maintenance/archives/patch-rev_deleted.sql @@ -0,0 +1,11 @@ +-- +-- Add rev_deleted flag to revision table. +-- Deleted revisions can thus continue to be listed in history +-- and user contributions, and their text storage doesn't have +-- to be disturbed. +-- +-- 2005-03-31 +-- + +ALTER TABLE /*$wgDBprefix*/revision + ADD rev_deleted tinyint(1) unsigned NOT NULL default '0'; diff --git a/maintenance/archives/patch-rev_text_id.sql b/maintenance/archives/patch-rev_text_id.sql new file mode 100644 index 00000000..44ef438c --- /dev/null +++ b/maintenance/archives/patch-rev_text_id.sql @@ -0,0 +1,17 @@ +-- +-- Adds rev_text_id field to revision table. +-- This is a key to text.old_id, so that revisions can be stored +-- for non-save operations without duplicating text, and so that +-- a back-end storage system can provide its own numbering system +-- if necessary. +-- +-- rev.rev_id and text.old_id are no longer assumed to be the same. +-- +-- 2005-03-28 +-- + +ALTER TABLE /*$wgDBprefix*/revision + ADD rev_text_id int(8) unsigned NOT NULL; + +UPDATE /*$wgDBprefix*/revision + SET rev_text_id=rev_id; diff --git a/maintenance/archives/patch-searchindex.sql b/maintenance/archives/patch-searchindex.sql new file mode 100644 index 00000000..fb54dbbe --- /dev/null +++ b/maintenance/archives/patch-searchindex.sql @@ -0,0 +1,40 @@ +-- Break fulltext search index out to separate table from cur +-- This is being done mainly to allow us to use InnoDB tables +-- for the main db while keeping the MyISAM fulltext index for +-- search. + +-- 2002-12-16, 2003-01-25 Brion VIBBER <brion@pobox.com> + +-- Creating searchindex table... +DROP TABLE IF EXISTS /*$wgDBprefix*/searchindex; +CREATE TABLE /*$wgDBprefix*/searchindex ( + -- Key to page_id + si_page int(8) unsigned NOT NULL, + + -- Munged version of title + si_title varchar(255) NOT NULL default '', + + -- Munged version of body text + si_text mediumtext NOT NULL default '', + + UNIQUE KEY (si_page) + +) TYPE=MyISAM; + +-- Copying data into new table... +INSERT INTO /*$wgDBprefix*/searchindex + (si_page,si_title,si_text) + SELECT + cur_id,cur_ind_title,cur_ind_text + FROM /*$wgDBprefix*/cur; + + +-- Creating fulltext index... +ALTER TABLE /*$wgDBprefix*/searchindex + ADD FULLTEXT si_title (si_title), + ADD FULLTEXT si_text (si_text); + +-- Dropping index columns from cur table. +ALTER TABLE /*$wgDBprefix*/cur + DROP COLUMN cur_ind_title, + DROP COLUMN cur_ind_text; diff --git a/maintenance/archives/patch-ss_images.sql b/maintenance/archives/patch-ss_images.sql new file mode 100644 index 00000000..e1950eb6 --- /dev/null +++ b/maintenance/archives/patch-ss_images.sql @@ -0,0 +1,5 @@ +-- More statistics, for version 1.6 + +ALTER TABLE /*$wgDBprefix*/site_stats ADD ss_images int(10) default '0'; +SELECT @images := COUNT(*) FROM /*$wgDBprefix*/image; +UPDATE /*$wgDBprefix*/site_stats SET ss_images=@images; diff --git a/maintenance/archives/patch-ss_total_articles.sql b/maintenance/archives/patch-ss_total_articles.sql new file mode 100644 index 00000000..b4a48cf7 --- /dev/null +++ b/maintenance/archives/patch-ss_total_articles.sql @@ -0,0 +1,6 @@ +-- Faster statistics, as of 1.4.3 + +ALTER TABLE /*$wgDBprefix*/site_stats + ADD ss_total_pages bigint(20) default -1, + ADD ss_users bigint(20) default -1, + ADD ss_admins int(10) default -1; diff --git a/maintenance/archives/patch-templatelinks.sql b/maintenance/archives/patch-templatelinks.sql new file mode 100644 index 00000000..49bd9c5e --- /dev/null +++ b/maintenance/archives/patch-templatelinks.sql @@ -0,0 +1,19 @@ +-- +-- Track template inclusions. +-- +CREATE TABLE /*$wgDBprefix*/templatelinks ( + -- Key to the page_id of the page containing the link. + tl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + tl_namespace int NOT NULL default '0', + tl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + KEY (tl_namespace,tl_title) + +) TYPE=InnoDB; + diff --git a/maintenance/archives/patch-trackbacks.sql b/maintenance/archives/patch-trackbacks.sql new file mode 100644 index 00000000..4193d058 --- /dev/null +++ b/maintenance/archives/patch-trackbacks.sql @@ -0,0 +1,10 @@ +CREATE TABLE /*$wgDBprefix*/trackbacks ( + tb_id INTEGER AUTO_INCREMENT PRIMARY KEY, + tb_page INTEGER REFERENCES page(page_id) ON DELETE CASCADE, + tb_title VARCHAR(255) NOT NULL, + tb_url VARCHAR(255) NOT NULL, + tb_ex TEXT, + tb_name VARCHAR(255), + + INDEX (tb_page) +); diff --git a/maintenance/archives/patch-transcache.sql b/maintenance/archives/patch-transcache.sql new file mode 100644 index 00000000..a244bff8 --- /dev/null +++ b/maintenance/archives/patch-transcache.sql @@ -0,0 +1,7 @@ +CREATE TABLE /*$wgDBprefix*/transcache ( + tc_url VARCHAR(255) NOT NULL, + tc_contents TEXT, + tc_time INT NOT NULL, + UNIQUE INDEX tc_url_idx(tc_url) +) TYPE=InnoDB; + diff --git a/maintenance/archives/patch-user-realname.sql b/maintenance/archives/patch-user-realname.sql new file mode 100644 index 00000000..96edaa43 --- /dev/null +++ b/maintenance/archives/patch-user-realname.sql @@ -0,0 +1,5 @@ +-- Add a 'real name' field where users can specify the name they want +-- used for author attribution or other places that real names matter. + +ALTER TABLE user + ADD (user_real_name varchar(255) binary NOT NULL default ''); diff --git a/maintenance/archives/patch-user_email_token.sql b/maintenance/archives/patch-user_email_token.sql new file mode 100644 index 00000000..d4d633b7 --- /dev/null +++ b/maintenance/archives/patch-user_email_token.sql @@ -0,0 +1,12 @@ +-- +-- E-mail confirmation token and expiration timestamp, +-- for verification of e-mail addresses. +-- +-- 2005-04-25 +-- + +ALTER TABLE /*$wgDBprefix*/user + ADD COLUMN user_email_authenticated CHAR(14) BINARY, + ADD COLUMN user_email_token CHAR(32) BINARY, + ADD COLUMN user_email_token_expires CHAR(14) BINARY, + ADD INDEX (user_email_token); diff --git a/maintenance/archives/patch-user_groups.sql b/maintenance/archives/patch-user_groups.sql new file mode 100644 index 00000000..50f99993 --- /dev/null +++ b/maintenance/archives/patch-user_groups.sql @@ -0,0 +1,25 @@ +-- +-- User permissions have been broken out to a separate table; +-- this allows sites with a shared user table to have different +-- permissions assigned to a user in each project. +-- +-- This table replaces the old user_rights field which used a +-- comma-separated blob. +-- +CREATE TABLE /*$wgDBprefix*/user_groups ( + -- Key to user_id + ug_user int(5) unsigned NOT NULL default '0', + + -- Group names are short symbolic string keys. + -- The set of group names is open-ended, though in practice + -- only some predefined ones are likely to be used. + -- + -- At runtime $wgGroupPermissions will associate group keys + -- with particular permissions. A user will have the combined + -- permissions of any group they're explicitly in, plus + -- the implicit '*' and 'user' groups. + ug_group char(16) NOT NULL default '', + + PRIMARY KEY (ug_user,ug_group), + KEY (ug_group) +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-user_nameindex.sql b/maintenance/archives/patch-user_nameindex.sql new file mode 100644 index 00000000..9bf0aab1 --- /dev/null +++ b/maintenance/archives/patch-user_nameindex.sql @@ -0,0 +1,13 @@ +-- +-- Change the index on user_name to a unique index to prevent +-- duplicate registrations from creeping in. +-- +-- Run maintenance/userDupes.php or through the updater first +-- to clean up any prior duplicate accounts. +-- +-- Added 2005-06-05 +-- + + ALTER TABLE /*$wgDBprefix*/user + DROP INDEX user_name, +ADD UNIQUE INDEX user_name(user_name); diff --git a/maintenance/archives/patch-user_registration.sql b/maintenance/archives/patch-user_registration.sql new file mode 100644 index 00000000..65fd99df --- /dev/null +++ b/maintenance/archives/patch-user_registration.sql @@ -0,0 +1,9 @@ +-- +-- New user field for tracking registration time +-- 2005-12-21 +-- + +ALTER TABLE /*$wgDBprefix*/user + -- Timestamp of account registration. + -- Accounts predating this schema addition may contain NULL. + ADD user_registration CHAR(14) BINARY; diff --git a/maintenance/archives/patch-user_rights.sql b/maintenance/archives/patch-user_rights.sql new file mode 100644 index 00000000..36f0102a --- /dev/null +++ b/maintenance/archives/patch-user_rights.sql @@ -0,0 +1,21 @@ +-- Split user table into two parts: +-- user +-- user_rights +-- The later contains only the permissions of the user. This way, +-- you can store the accounts for several wikis in one central +-- database but keep user rights local to the wiki. + +CREATE TABLE /*$wgDBprefix*/user_rights ( + -- Key to user_id + ur_user int(5) unsigned NOT NULL, + + -- Comma-separated list of permission keys + ur_rights tinyblob NOT NULL default '', + + UNIQUE KEY ur_user (ur_user) + +) TYPE=InnoDB; + +INSERT INTO /*$wgDBprefix*/user_rights SELECT user_id,user_rights FROM /*$wgDBprefix*/user; + +ALTER TABLE /*$wgDBprefix*/user DROP COLUMN user_rights; diff --git a/maintenance/archives/patch-user_token.sql b/maintenance/archives/patch-user_token.sql new file mode 100644 index 00000000..797dc98f --- /dev/null +++ b/maintenance/archives/patch-user_token.sql @@ -0,0 +1,15 @@ +-- user_token patch +-- 2004-09-23 + +ALTER TABLE /*$wgDBprefix*/user ADD user_token char(32) binary NOT NULL default ''; + +UPDATE /*$wgDBprefix*/user SET user_token = concat( + substring(rand(),3,4), + substring(rand(),3,4), + substring(rand(),3,4), + substring(rand(),3,4), + substring(rand(),3,4), + substring(rand(),3,4), + substring(rand(),3,4), + substring(rand(),3,4) +); diff --git a/maintenance/archives/patch-userindex.sql b/maintenance/archives/patch-userindex.sql new file mode 100644 index 00000000..c039b2f3 --- /dev/null +++ b/maintenance/archives/patch-userindex.sql @@ -0,0 +1 @@ + ALTER TABLE /*$wgDBprefix*/user ADD INDEX ( `user_name` );
\ No newline at end of file diff --git a/maintenance/archives/patch-userlevels-defaultgroups.sql b/maintenance/archives/patch-userlevels-defaultgroups.sql new file mode 100644 index 00000000..065653da --- /dev/null +++ b/maintenance/archives/patch-userlevels-defaultgroups.sql @@ -0,0 +1,30 @@ +-- +-- Provide default groups +-- Should probably be inserted when someone create a new database +-- + +INSERT INTO /*$wgDBprefix*/groups (gr_id,gr_name,gr_description,gr_rights) + VALUES ( + 1,':group-anon-name',':group-anon-desc', + 'read,edit,createaccount' + ); +INSERT INTO /*$wgDBprefix*/groups (gr_id,gr_name,gr_description,gr_rights) + VALUES ( + 2,':group-loggedin-name',':group-loggedin-desc', + 'read,edit,move,upload,validate,createaccount' + ); +INSERT INTO /*$wgDBprefix*/groups (gr_id,gr_name,gr_description,gr_rights) + VALUES ( + 3,':group-admin-name',':group-admin-desc', + 'read,edit,move,upload,validate,createaccount,delete,undelete,protect,block,upload,asksql,rollback,patrol,editinterface,import' + ); +INSERT INTO /*$wgDBprefix*/groups (gr_id,gr_name,gr_description,gr_rights) + VALUES ( + 4,':group-bureaucrat-name',':group-bureaucrat-desc', + 'read,edit,move,upload,validate,createaccount,delete,undelete,protect,block,upload,asksql,rollback,patrol,editinterface,import,makesysop' + ); +INSERT INTO /*$wgDBprefix*/groups (gr_id,gr_name,gr_description,gr_rights) + VALUES ( + 5,':group-steward-name',':group-steward-desc', + 'read,edit,move,upload,validate,createaccount,delete,undelete,protect,block,upload,asksql,rollback,patrol,editinterface,import,makesysop,userrights,grouprights,siteadmin' + ); diff --git a/maintenance/archives/patch-userlevels-rights.sql b/maintenance/archives/patch-userlevels-rights.sql new file mode 100644 index 00000000..7f1cabfc --- /dev/null +++ b/maintenance/archives/patch-userlevels-rights.sql @@ -0,0 +1,5 @@ +-- Oct. 24 2004 +-- Adds the gr_rights field missing from early dev work + +-- Hold group name and description +ALTER TABLE /*$wgDBprefix*/groups ADD gr_rights tinyblob; diff --git a/maintenance/archives/patch-userlevels.sql b/maintenance/archives/patch-userlevels.sql new file mode 100644 index 00000000..ab3a9a7b --- /dev/null +++ b/maintenance/archives/patch-userlevels.sql @@ -0,0 +1,22 @@ +-- Oct. 1st 2004 - Ashar Voultoiz +-- Implement the new sitelevels +-- +-- This is under development to provide a showcase in HEAD :o) + +-- Hold group name and description +CREATE TABLE /*$wgDBprefix*/groups ( + gr_id int(5) unsigned NOT NULL auto_increment, + gr_name varchar(50) NOT NULL default '', + gr_description varchar(255) NOT NULL default '', + gr_rights tinyblob, + PRIMARY KEY (gr_id) + +) TYPE=InnoDB; + +-- Relation table between user and groups +CREATE TABLE /*$wgDBprefix*/user_groups ( + ug_user int(5) unsigned NOT NULL default '0', + ug_group int(5) unsigned NOT NULL default '0', + PRIMARY KEY (ug_user,ug_group) + +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-usernewtalk.sql b/maintenance/archives/patch-usernewtalk.sql new file mode 100644 index 00000000..fb8c8655 --- /dev/null +++ b/maintenance/archives/patch-usernewtalk.sql @@ -0,0 +1,20 @@ +--- This table stores all the IDs of users whose talk +--- page has been changed (the respective row is deleted +--- when the user looks at the page). +--- The respective column in the user table is no longer +--- required and therefore dropped. + +CREATE TABLE /*$wgDBprefix*/user_newtalk ( + user_id int(5) NOT NULL default '0', + user_ip varchar(40) NOT NULL default '', + KEY user_id (user_id), + KEY user_ip (user_ip) +) TYPE=MyISAM; + +INSERT INTO + /*$wgDBprefix*/user_newtalk (user_id, user_ip) + SELECT user_id, '' + FROM user + WHERE user_newtalk != 0; + +ALTER TABLE /*$wgDBprefix*/user DROP COLUMN user_newtalk; diff --git a/maintenance/archives/patch-usernewtalk2.sql b/maintenance/archives/patch-usernewtalk2.sql new file mode 100644 index 00000000..477109b7 --- /dev/null +++ b/maintenance/archives/patch-usernewtalk2.sql @@ -0,0 +1,6 @@ +CREATE TABLE /*$wgDBprefix*/user_newtalk ( + user_id int(5) NOT NULL default '0', + user_ip varchar(40) NOT NULL default '', + INDEX user_id (user_id), + INDEX user_ip (user_ip) +); diff --git a/maintenance/archives/patch-val_ip.sql b/maintenance/archives/patch-val_ip.sql new file mode 100644 index 00000000..9214218d --- /dev/null +++ b/maintenance/archives/patch-val_ip.sql @@ -0,0 +1,4 @@ +-- Column added 2005-05-24 + +ALTER TABLE /*$wgDBprefix*/validate + ADD COLUMN val_ip varchar(20) NOT NULL default ''; diff --git a/maintenance/archives/patch-validate.sql b/maintenance/archives/patch-validate.sql new file mode 100644 index 00000000..3fa7e844 --- /dev/null +++ b/maintenance/archives/patch-validate.sql @@ -0,0 +1,13 @@ +-- For article validation + +DROP TABLE IF EXISTS /*$wgDBprefix*/validate; +CREATE TABLE /*$wgDBprefix*/validate ( + `val_user` int(11) NOT NULL default '0', + `val_page` int(11) unsigned NOT NULL default '0', + `val_revision` int(11) unsigned NOT NULL default '0', + `val_type` int(11) unsigned NOT NULL default '0', + `val_value` int(11) default '0', + `val_comment` varchar(255) NOT NULL default '', + `val_ip` varchar(20) NOT NULL default '', + KEY `val_user` (`val_user`,`val_revision`) +) TYPE=InnoDB; diff --git a/maintenance/archives/patch-watchlist-null.sql b/maintenance/archives/patch-watchlist-null.sql new file mode 100644 index 00000000..37ffc163 --- /dev/null +++ b/maintenance/archives/patch-watchlist-null.sql @@ -0,0 +1,9 @@ +-- Set up wl_notificationtimestamp with NULL support. +-- 2005-08-17 + +ALTER TABLE /*$wgDBprefix*/watchlist + CHANGE wl_notificationtimestamp wl_notificationtimestamp varchar(14) binary; + +UPDATE /*$wgDBprefix*/watchlist + SET wl_notificationtimestamp=NULL + WHERE wl_notificationtimestamp='0'; diff --git a/maintenance/archives/patch-watchlist.sql b/maintenance/archives/patch-watchlist.sql new file mode 100644 index 00000000..adee010b --- /dev/null +++ b/maintenance/archives/patch-watchlist.sql @@ -0,0 +1,30 @@ +-- Convert watchlists to new new format ;) + +-- Ids just aren't convenient when what we want is to +-- treat article and talk pages as equivalent. +-- Better to use namespace (drop the 1 bit!) and title + +-- 2002-12-17 by Brion Vibber <brion@pobox.com> +-- affects, affected by changes to SpecialWatchlist.php, User.php, +-- Article.php, Title.php, SpecialRecentchanges.php + +DROP TABLE IF EXISTS watchlist2; +CREATE TABLE watchlist2 ( + wl_user int(5) unsigned NOT NULL, + wl_namespace tinyint(2) unsigned NOT NULL default '0', + wl_title varchar(255) binary NOT NULL default '', + UNIQUE KEY (wl_user, wl_namespace, wl_title) +) TYPE=MyISAM PACK_KEYS=1; + +INSERT INTO watchlist2 (wl_user,wl_namespace,wl_title) + SELECT DISTINCT wl_user,(cur_namespace | 1) - 1,cur_title + FROM watchlist,cur WHERE wl_page=cur_id; + +ALTER TABLE watchlist RENAME TO oldwatchlist; +ALTER TABLE watchlist2 RENAME TO watchlist; + +-- Check that the new one is correct, then: +-- DROP TABLE oldwatchlist; + +-- Also should probably drop the ancient and now unused: +ALTER TABLE user DROP COLUMN user_watch; diff --git a/maintenance/archives/rebuildRecentchanges.inc b/maintenance/archives/rebuildRecentchanges.inc new file mode 100644 index 00000000..54f6cb38 --- /dev/null +++ b/maintenance/archives/rebuildRecentchanges.inc @@ -0,0 +1,122 @@ +<?php +/** + * Rebuild recent changes table + * + * @deprecated + * @package MediaWiki + * @subpackage MaintenanceArchive + */ + +/** */ +function rebuildRecentChangesTable() +{ + $sql = "DROP TABLE IF EXISTS recentchanges"; + wfQuery( $sql ); + + $sql = "CREATE TABLE recentchanges ( + rc_timestamp varchar(14) binary NOT NULL default '', + rc_cur_time varchar(14) binary NOT NULL default '', + rc_user int(10) unsigned NOT NULL default '0', + rc_user_text varchar(255) binary NOT NULL default '', + rc_namespace tinyint(3) unsigned NOT NULL default '0', + rc_title varchar(255) binary NOT NULL default '', + rc_comment varchar(255) binary NOT NULL default '', + rc_minor tinyint(3) unsigned NOT NULL default '0', + rc_new tinyint(3) unsigned NOT NULL default '0', + rc_cur_id int(10) unsigned NOT NULL default '0', + rc_this_oldid int(10) unsigned NOT NULL default '0', + rc_last_oldid int(10) unsigned NOT NULL default '0', + INDEX rc_cur_id (rc_cur_id), + INDEX rc_cur_time (rc_cur_time), + INDEX rc_timestamp (rc_timestamp), + INDEX rc_namespace (rc_namespace), + INDEX rc_title (rc_title) +) TYPE=MyISAM PACK_KEYS=1;"; + wfQuery( $sql ); + + print( "Loading from CUR table...\n" ); + + $sql = "INSERT INTO recentchanges (rc_timestamp,rc_cur_time,rc_user," . + "rc_user_text,rc_namespace,rc_title,rc_comment,rc_minor,rc_new," . + "rc_cur_id,rc_this_oldid,rc_last_oldid) SELECT cur_timestamp," . + "cur_timestamp,cur_user,cur_user_text,cur_namespace,cur_title," . + "cur_comment,cur_minor_edit,cur_is_new,cur_id,0,0 FROM cur " . + "ORDER BY cur_timestamp DESC LIMIT 5000"; + wfQuery( $sql ); + + print( "Loading from OLD table...\n" ); + + $sql = "INSERT INTO recentchanges (rc_timestamp,rc_cur_time,rc_user," . + "rc_user_text,rc_namespace,rc_title,rc_comment,rc_minor,rc_new," . + "rc_cur_id,rc_this_oldid,rc_last_oldid) SELECT old_timestamp,''," . + "old_user,old_user_text,old_namespace,old_title,old_comment," . + "old_minor_edit,0,0,old_id,0 FROM old ORDER BY old_timestamp " . + "DESC LIMIT 5000"; + wfQuery( $sql ); + + $sql = "SELECT rc_timestamp FROM recentchanges " . + "ORDER BY rc_timestamp DESC LIMIT 5000,1"; + $res = wfQuery( $sql ); + $obj = wfFetchObject( $res ); + $ts = $obj->rc_timestamp; + + $sql = "DELETE FROM recentchanges WHERE rc_timestamp < '{$ts}'"; + wfQuery( $sql ); + + rebuildRecentChangesTablePass2(); +} + +function rebuildRecentChangesTablePass2() +{ + $ns = $id = $count = 0; + $title = $ct = ""; + + print( "Updating links...\n" ); + + $sql = "SELECT rc_namespace,rc_title,rc_timestamp FROM recentchanges " . + "ORDER BY rc_namespace,rc_title,rc_timestamp DESC"; + $res = wfQuery( $sql ); + + while ( $obj = wfFetchObject( $res ) ) { + if ( ! ( $ns == $obj->rc_namespace && + 0 == strcmp( $title, wfStrencode( $obj->rc_title ) ) ) ) { + + $ns = $obj->rc_namespace; + $title = wfStrencode( $obj->rc_title ); + + $sql = "SELECT cur_id,cur_timestamp FROM cur WHERE " . + "cur_namespace={$ns} AND cur_title='{$title}'"; + $res2 = wfQuery( $sql ); + $obj2 = wfFetchObject( $res2 ); + + $id = $obj2->cur_id; + $ct = $obj2->cur_timestamp; + } + $sql = "SELECT old_id FROM old WHERE old_namespace={$ns} " . + "AND old_title='{$title}' AND old_timestamp < '" . + "{$obj->rc_timestamp}' ORDER BY old_timestamp DESC LIMIT 1"; + $res2 = wfQuery( $sql ); + + if ( 0 != wfNumRows( $res2 ) ) { + $obj2 = wfFetchObject( $res2 ); + + $sql = "UPDATE recentchanges SET rc_cur_id={$id},rc_cur_time=" . + "'{$ct}',rc_last_oldid={$obj2->old_id} WHERE " . + "rc_namespace={$ns} AND rc_title='{$title}' AND " . + "rc_timestamp='{$obj->rc_timestamp}'"; + wfQuery( $sql ); + } else { + $sql = "UPDATE recentchanges SET rc_cur_id={$id},rc_cur_time=" . + "'{$ct}' WHERE rc_namespace={$ns} AND rc_title='{$title}' " . + "AND rc_timestamp='{$obj->rc_timestamp}'"; + wfQuery( $sql ); + } + + if ( 0 == ( ++$count % 500 ) ) { + printf( "%d records processed.\n", $count ); + } + } +} + + +?> diff --git a/maintenance/archives/upgradeWatchlist.php b/maintenance/archives/upgradeWatchlist.php new file mode 100644 index 00000000..b4605a50 --- /dev/null +++ b/maintenance/archives/upgradeWatchlist.php @@ -0,0 +1,67 @@ +<?php +/** + * @deprecated + * @package MediaWiki + * @subpackage MaintenanceArchive + */ + +/** */ +print "This script is obsolete!"; +print "It is retained in the source here in case some of its +code might be useful for ad-hoc conversion tasks, but it is +not maintained and probably won't even work as is."; +exit(); + +# Convert watchlists to new format + +global $IP; +require_once( "../LocalSettings.php" ); +require_once( "$IP/Setup.php" ); + +$wgTitle = Title::newFromText( "Rebuild links script" ); +set_time_limit(0); + +$wgDBuser = "wikiadmin"; +$wgDBpassword = $wgDBadminpassword; + +$sql = "DROP TABLE IF EXISTS watchlist"; +wfQuery( $sql, DB_MASTER ); +$sql = "CREATE TABLE watchlist ( + wl_user int(5) unsigned NOT NULL, + wl_page int(8) unsigned NOT NULL, + UNIQUE KEY (wl_user, wl_page) +) TYPE=MyISAM PACK_KEYS=1"; +wfQuery( $sql, DB_MASTER ); + +$lc = new LinkCache; + +# Now, convert! +$sql = "SELECT user_id,user_watch FROM user"; +$res = wfQuery( $sql, DB_SLAVE ); +$nu = wfNumRows( $res ); +$sql = "INSERT into watchlist (wl_user,wl_page) VALUES "; +$i = $n = 0; +while( $row = wfFetchObject( $res ) ) { + $list = explode( "\n", $row->user_watch ); + $bits = array(); + foreach( $list as $title ) { + if( $id = $lc->addLink( $title ) and ! $bits[$id]++) { + $sql .= ($i++ ? "," : "") . "({$row->user_id},{$id})"; + } + } + if( ($n++ % 100) == 0 ) echo "$n of $nu users done...\n"; +} +echo "$n users done.\n"; +if( $i ) { + wfQuery( $sql, DB_MASTER ); +} + + +# Add index +# is this necessary? +$sql = "ALTER TABLE watchlist + ADD INDEX wl_user (wl_user), + ADD INDEX wl_page (wl_page)"; +#wfQuery( $sql, DB_MASTER ); + +?> diff --git a/maintenance/attachLatest.php b/maintenance/attachLatest.php new file mode 100644 index 00000000..024a4fac --- /dev/null +++ b/maintenance/attachLatest.php @@ -0,0 +1,73 @@ +<?php +// quick hackjob to fix damages imports on wikisource +// page records have page_latest wrong + +/** + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage Maintenance + */ + +require_once( 'commandLine.inc' ); + +$fixit = isset( $options['fix'] ); +$fname = 'attachLatest'; + +echo "Looking for pages with page_latest set to 0...\n"; +$dbw =& wfGetDB( DB_MASTER ); +$result = $dbw->select( 'page', + array( 'page_id', 'page_namespace', 'page_title' ), + array( 'page_latest' => 0 ), + $fname ); + +$n = 0; +while( $row = $dbw->fetchObject( $result ) ) { + $pageId = intval( $row->page_id ); + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $name = $title->getPrefixedText(); + $latestTime = $dbw->selectField( 'revision', + 'MAX(rev_timestamp)', + array( 'rev_page' => $pageId ), + $fname ); + if( !$latestTime ) { + echo "$wgDBname $pageId [[$name]] can't find latest rev time?!\n"; + continue; + } + + $revision = Revision::loadFromTimestamp( $dbw, $title, $latestTime ); + if( is_null( $revision ) ) { + echo "$wgDBname $pageId [[$name]] latest time $latestTime, can't find revision id\n"; + continue; + } + $id = $revision->getId(); + echo "$wgDBname $pageId [[$name]] latest time $latestTime, rev id $id\n"; + if( $fixit ) { + $article = new Article( $title ); + $article->updateRevisionOn( $dbw, $revision ); + } + $n++; +} +$dbw->freeResult( $result ); +echo "Done! Processed $n pages.\n"; +if( !$fixit ) { + echo "This was a dry run; rerun with --fix to update page_latest.\n"; +} + +?> diff --git a/maintenance/attribute.php b/maintenance/attribute.php new file mode 100644 index 00000000..3326180c --- /dev/null +++ b/maintenance/attribute.php @@ -0,0 +1,105 @@ +<?php +/** + * Script for re-attributing edits + * + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); + +# Parameters +if ( count( $args ) < 2 ) { + print "Not enough parameters\n"; + if ( $wgWikiFarm ) { + print "Usage: php attribute.php <language> <site> <source> <destination>\n"; + } else { + print "Usage: php attribute.php <source> <destination>\n"; + } + exit; +} + +$source = $args[0]; +$dest = $args[1]; + +$dbr =& wfGetDB( DB_SLAVE ); +extract( $dbr->tableNames( 'page', 'revision','user' )); +$eSource = $dbr->strencode( $source ); +$eDest = $dbr->strencode( $dest ); + +# Get user id +$res = $dbr->query( "SELECT user_id FROM $user WHERE user_name='$eDest'" ); +$row = $dbr->fetchObject( $res ); +if ( !$row ) { + print "Warning: the target name \"$dest\" does not exist"; + $uid = 0; +} else { + $uid = $row->user_id; +} + +# Initialise files +$logfile = fopen( "attribute.log", "a" ); +$sqlfile = fopen( "attribute.sql", "a" ); + +fwrite( $logfile, "* $source → $dest\n" ); + +fwrite( $sqlfile, +"-- Changing attribution SQL file +-- Generated with attribute.php +-- $source -> $dest ($uid) +"); + +$omitTitle = "Wikipedia:Changing_attribution_for_an_edit"; + +# Get revisions +print "\nPage revisions\n\n"; + +$res = $dbr->query( "SELECT page_namespace, page_title, rev_id, rev_timestamp +FROM $revision,$page +WHERE rev_user_text='$eSource' and rev_page=page_id" ); +$row = $dbr->fetchObject( $res ); + +if ( $row ) { +/* + if ( $row->old_title=='Votes_for_deletion' && $row->old_namespace == 4 ) { + # We don't have that long + break; + } +*/ + fwrite( $logfile, "**Revision IDs: " ); + fwrite( $sqlfile, "UPDATE $revision SET rev_user=$uid, rev_user_text='$eDest' WHERE rev_id IN (\n" ); + + for ( $first=true; $row; $row = $dbr->fetchObject( $res ) ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $fullTitle = $title->getPrefixedDbKey(); + if ( $fullTitle == $omitTitle ) { + continue; + } + + print "$fullTitle\n"; + $url = $title->getFullUrl( "oldid={$row->rev_id}" ); + + # Output + fwrite( $sqlfile, " " ); + if ( $first ) { + $first = false; + } else { + fwrite( $sqlfile, ", " ); + fwrite( $logfile, ", " ); + } + + fwrite( $sqlfile, "{$row->rev_id} -- $url\n" ); + fwrite( $logfile, "[$url {$row->rev_id}]" ); + + } + fwrite( $sqlfile, ");\n" ); + fwrite( $logfile, "\n" ); +} + +print "\n"; + +fclose( $sqlfile ); +fclose( $logfile ); + +?> diff --git a/maintenance/backup.inc b/maintenance/backup.inc new file mode 100644 index 00000000..d3603bd1 --- /dev/null +++ b/maintenance/backup.inc @@ -0,0 +1,296 @@ +<?php +/** + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage SpecialPage + */ + +class DumpDBZip2Output extends DumpPipeOutput { + function DumpDBZip2Output( $file ) { + parent::DumpPipeOutput( "dbzip2", $file ); + } +} + +class BackupDumper { + var $reportingInterval = 100; + var $reporting = true; + var $pageCount = 0; + var $revCount = 0; + var $server = null; // use default + var $pages = null; // all pages + var $skipHeader = false; // don't output <mediawiki> and <siteinfo> + var $skipFooter = false; // don't output </mediawiki> + var $startId = 0; + var $endId = 0; + var $sink = null; // Output filters + var $stubText = false; // include rev_text_id instead of text; for 2-pass dump + + function BackupDumper( $args ) { + $this->stderr = fopen( "php://stderr", "wt" ); + + // Built-in output and filter plugins + $this->registerOutput( 'file', 'DumpFileOutput' ); + $this->registerOutput( 'gzip', 'DumpGZipOutput' ); + $this->registerOutput( 'bzip2', 'DumpBZip2Output' ); + $this->registerOutput( 'dbzip2', 'DumpDBZip2Output' ); + $this->registerOutput( '7zip', 'Dump7ZipOutput' ); + + $this->registerFilter( 'latest', 'DumpLatestFilter' ); + $this->registerFilter( 'notalk', 'DumpNotalkFilter' ); + $this->registerFilter( 'namespace', 'DumpNamespaceFilter' ); + + $this->sink = $this->processArgs( $args ); + } + + /** + * @param string $name + * @param string $class name of output filter plugin class + */ + function registerOutput( $name, $class ) { + $this->outputTypes[$name] = $class; + } + + /** + * @param string $name + * @param string $class name of filter plugin class + */ + function registerFilter( $name, $class ) { + $this->filterTypes[$name] = $class; + } + + /** + * Load a plugin and register it + * @param string $class Name of plugin class; must have a static 'register' + * method that takes a BackupDumper as a parameter. + * @param string $file Full or relative path to the PHP file to load, or empty + */ + function loadPlugin( $class, $file ) { + if( $file != '' ) { + require_once( $file ); + } + $register = array( $class, 'register' ); + call_user_func_array( $register, array( &$this ) ); + } + + /** + * @param array $args + * @return array + * @static + */ + function processArgs( $args ) { + $sink = null; + $sinks = array(); + foreach( $args as $arg ) { + if( preg_match( '/^--(.+?)(?:=(.+?)(?::(.+?))?)?$/', $arg, $matches ) ) { + @list( $full, $opt, $val, $param ) = $matches; + switch( $opt ) { + case "plugin": + $this->loadPlugin( $val, $param ); + break; + case "output": + if( !is_null( $sink ) ) { + $sinks[] = $sink; + } + if( !isset( $this->outputTypes[$val] ) ) { + wfDie( "Unrecognized output sink type '$val'\n" ); + } + $type = $this->outputTypes[$val]; + $sink = new $type( $param ); + break; + case "filter": + if( is_null( $sink ) ) { + $this->progress( "Warning: assuming stdout for filter output\n" ); + $sink = new DumpOutput(); + } + if( !isset( $this->filterTypes[$val] ) ) { + wfDie( "Unrecognized filter type '$val'\n" ); + } + $type = $this->filterTypes[$val]; + $filter = new $type( $sink, $param ); + + // references are lame in php... + unset( $sink ); + $sink = $filter; + + break; + case "report": + $this->reportingInterval = intval( $val ); + break; + case "server": + $this->server = $val; + break; + case "force-normal": + if( !function_exists( 'utf8_normalize' ) ) { + dl( "php_utfnormal.so" ); + if( !function_exists( 'utf8_normalize' ) ) { + wfDie( "Failed to load UTF-8 normalization extension. " . + "Install or remove --force-normal parameter to use slower code.\n" ); + } + } + break; + default: + $this->processOption( $opt, $val, $param ); + } + } + } + + if( is_null( $sink ) ) { + $sink = new DumpOutput(); + } + $sinks[] = $sink; + + if( count( $sinks ) > 1 ) { + return new DumpMultiWriter( $sinks ); + } else { + return $sink; + } + } + + function processOption( $opt, $val, $param ) { + // extension point for subclasses to add options + } + + function dump( $history, $text = MW_EXPORT_TEXT ) { + # This shouldn't happen if on console... ;) + header( 'Content-type: text/html; charset=UTF-8' ); + + # Notice messages will foul up your XML output even if they're + # relatively harmless. + ini_set( 'display_errors', false ); + + $this->initProgress( $history ); + + $db =& $this->backupDb(); + $exporter = new WikiExporter( $db, $history, MW_EXPORT_STREAM, $text ); + + $wrapper = new ExportProgressFilter( $this->sink, $this ); + $exporter->setOutputSink( $wrapper ); + + if( !$this->skipHeader ) + $exporter->openStream(); + + if( is_null( $this->pages ) ) { + if( $this->startId || $this->endId ) { + $exporter->pagesByRange( $this->startId, $this->endId ); + } else { + $exporter->allPages(); + } + } else { + $exporter->pagesByName( $this->pages ); + } + + if( !$this->skipFooter ) + $exporter->closeStream(); + + $this->report( true ); + } + + /** + * Initialise starting time and maximum revision count. + * We'll make ETA calculations based an progress, assuming relatively + * constant per-revision rate. + * @param int $history MW_EXPORT_CURRENT or MW_EXPORT_FULL + */ + function initProgress( $history = MW_EXPORT_FULL ) { + $table = ($history == MW_EXPORT_CURRENT) ? 'page' : 'revision'; + $field = ($history == MW_EXPORT_CURRENT) ? 'page_id' : 'rev_id'; + + $dbr =& wfGetDB( DB_SLAVE ); + $this->maxCount = $dbr->selectField( $table, "MAX($field)", '', 'BackupDumper::dump' ); + $this->startTime = wfTime(); + } + + function &backupDb() { + global $wgDBadminuser, $wgDBadminpassword; + global $wgDBname, $wgDebugDumpSql; + $flags = ($wgDebugDumpSql ? DBO_DEBUG : 0) | DBO_DEFAULT; // god-damn hack + $db =& new Database( $this->backupServer(), $wgDBadminuser, $wgDBadminpassword, $wgDBname, false, $flags ); + $timeout = 3600 * 24; + $db->query( "SET net_read_timeout=$timeout" ); + $db->query( "SET net_write_timeout=$timeout" ); + return $db; + } + + function backupServer() { + global $wgDBserver; + return $this->server + ? $this->server + : $wgDBserver; + } + + function reportPage() { + $this->pageCount++; + } + + function revCount() { + $this->revCount++; + $this->report(); + } + + function report( $final = false ) { + if( $final xor ( $this->revCount % $this->reportingInterval == 0 ) ) { + $this->showReport(); + } + } + + function showReport() { + if( $this->reporting ) { + $delta = wfTime() - $this->startTime; + $now = wfTimestamp( TS_DB ); + if( $delta ) { + $rate = $this->pageCount / $delta; + $revrate = $this->revCount / $delta; + $portion = $this->revCount / $this->maxCount; + $eta = $this->startTime + $delta / $portion; + $etats = wfTimestamp( TS_DB, intval( $eta ) ); + } else { + $rate = '-'; + $revrate = '-'; + $etats = '-'; + } + global $wgDBname; + $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), ETA %s [max %d]", + $now, $wgDBname, $this->pageCount, $rate, $this->revCount, $revrate, $etats, $this->maxCount ) ); + } + } + + function progress( $string ) { + fwrite( $this->stderr, $string . "\n" ); + } +} + +class ExportProgressFilter extends DumpFilter { + function ExportProgressFilter( &$sink, &$progress ) { + parent::DumpFilter( $sink ); + $this->progress = $progress; + } + + function writeClosePage( $string ) { + parent::writeClosePage( $string ); + $this->progress->reportPage(); + } + + function writeRevision( $rev, $string ) { + parent::writeRevision( $rev, $string ); + $this->progress->revCount(); + } +} + +?> diff --git a/maintenance/backupPrefetch.inc b/maintenance/backupPrefetch.inc new file mode 100644 index 00000000..413247d7 --- /dev/null +++ b/maintenance/backupPrefetch.inc @@ -0,0 +1,203 @@ +<?php + +// Some smart guy removed XMLReader's global constants from PHP 5.1 +// and replaced them with class constants. Breaking source compatibility +// is SUPER awesome, and I love languages which do this constantly! +$xmlReaderConstants = array( + "NONE", + "ELEMENT", + "ATTRIBUTE", + "TEXT", + "CDATA", + "ENTITY_REF", + "ENTITY", + "PI", + "COMMENT", + "DOC", + "DOC_TYPE", + "DOC_FRAGMENT", + "NOTATION", + "WHITESPACE", + "SIGNIFICANT_WHITESPACE", + "END_ELEMENT", + "END_ENTITY", + "XML_DECLARATION", + "LOADDTD", + "DEFAULTATTRS", + "VALIDATE", + "SUBST_ENTITIES" ); +foreach( $xmlReaderConstants as $name ) { + $fullName = "XMLREADER_$name"; + $newName = "XMLReader::$name"; + if( !defined( $fullName ) ) { + if( defined( $newName ) ) { + define( $fullName, constant( $newName ) ); + } else { + // broken or missing the extension... + } + } +} + +/** + * Readahead helper for making large MediaWiki data dumps; + * reads in a previous XML dump to sequentially prefetch text + * records already normalized and decompressed. + * + * This can save load on the external database servers, hopefully. + * + * Assumes that dumps will be recorded in the canonical order: + * - ascending by page_id + * - ascending by rev_id within each page + * - text contents are immutable and should not change once + * recorded, so the previous dump is a reliable source + * + * Requires PHP 5 and the XMLReader PECL extension. + */ +class BaseDump { + var $reader = null; + var $atEnd = false; + var $atPageEnd = false; + var $lastPage = 0; + var $lastRev = 0; + + function BaseDump( $infile ) { + $this->reader = new XMLReader(); + $this->reader->open( $infile ); + } + + /** + * Attempts to fetch the text of a particular page revision + * from the dump stream. May return null if the page is + * unavailable. + * + * @param int $page ID number of page to read + * @param int $rev ID number of revision to read + * @return string or null + */ + function prefetch( $page, $rev ) { + $page = intval( $page ); + $rev = intval( $rev ); + while( $this->lastPage < $page && !$this->atEnd ) { + $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" ); + $this->nextPage(); + } + if( $this->lastPage > $page || $this->atEnd ) { + $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" ); + return null; + } + while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) { + $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" ); + $this->nextRev(); + } + if( $this->lastRev == $rev && !$this->atEnd ) { + $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" ); + return $this->nextText(); + } else { + $this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" ); + return null; + } + } + + function debug( $str ) { + wfDebug( $str . "\n" ); + //global $dumper; + //$dumper->progress( $str ); + } + + /** + * @access private + */ + function nextPage() { + if( $this->skipTo( 'page', 'mediawiki' ) ) { + if( $this->skipTo( 'id' ) ) { + $this->lastPage = intval( $this->nodeContents() ); + $this->lastRev = 0; + $this->atPageEnd = false; + } + } else { + $this->atEnd = true; + } + } + + /** + * @access private + */ + function nextRev() { + if( $this->skipTo( 'revision' ) ) { + if( $this->skipTo( 'id' ) ) { + $this->lastRev = intval( $this->nodeContents() ); + } + } else { + $this->atPageEnd = true; + } + } + + /** + * @access private + */ + function nextText() { + $this->skipTo( 'text' ); + return strval( $this->nodeContents() ); + } + + /** + * @access private + */ + function skipTo( $name, $parent='page' ) { + if( $this->atEnd ) { + return false; + } + while( $this->reader->read() ) { + if( $this->reader->nodeType == XMLREADER_ELEMENT && + $this->reader->name == $name ) { + return true; + } + if( $this->reader->nodeType == XMLREADER_END_ELEMENT && + $this->reader->name == $parent ) { + $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" ); + return false; + } + } + return $this->close(); + } + + /** + * Shouldn't something like this be built-in to XMLReader? + * Fetches text contents of the current element, assuming + * no sub-elements or such scary things. + * @return string + * @access private + */ + function nodeContents() { + if( $this->atEnd ) { + return null; + } + if( $this->reader->isEmptyElement ) { + return ""; + } + $buffer = ""; + while( $this->reader->read() ) { + switch( $this->reader->nodeType ) { + case XMLREADER_TEXT: +// case XMLREADER_WHITESPACE: + case XMLREADER_SIGNIFICANT_WHITESPACE: + $buffer .= $this->reader->value; + break; + case XMLREADER_END_ELEMENT: + return $buffer; + } + } + return $this->close(); + } + + /** + * @access private + */ + function close() { + $this->reader->close(); + $this->atEnd = true; + return null; + } +} + +?> diff --git a/maintenance/benchmarkPurge.php b/maintenance/benchmarkPurge.php new file mode 100644 index 00000000..69127681 --- /dev/null +++ b/maintenance/benchmarkPurge.php @@ -0,0 +1,65 @@ +<?php +/** + * Squid purge benchmark script + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); + +/** @todo document */ +function benchSquid( $urls, $trials = 1 ) { + $start = wfTime(); + for( $i = 0; $i < $trials; $i++) { + SquidUpdate::purge( $urls ); + } + $delta = wfTime() - $start; + $pertrial = $delta / $trials; + $pertitle = $pertrial / count( $urls ); + return sprintf( "%4d titles in %6.2fms (%6.2fms each)", + count( $urls ), $pertrial * 1000.0, $pertitle * 1000.0 ); +} + +/** @todo document */ +function randomUrlList( $length ) { + $list = array(); + for( $i = 0; $i < $length; $i++ ) { + $list[] = randomUrl(); + } + return $list; +} + +/** @todo document */ +function randomUrl() { + global $wgServer, $wgArticlePath; + return $wgServer . str_replace( '$1', randomTitle(), $wgArticlePath ); +} + +/** @todo document */ +function randomTitle() { + $str = ''; + $length = mt_rand( 1, 20 ); + for( $i = 0; $i < $length; $i++ ) { + $str .= chr( mt_rand( ord('a'), ord('z') ) ); + } + return ucfirst( $str ); +} + +if( !$wgUseSquid ) { + wfDie( "Squid purge benchmark doesn't do much without squid support on.\n" ); +} else { + printf( "There are %d defined squid servers:\n", count( $wgSquidServers ) ); + #echo implode( "\n", $wgSquidServers ) . "\n"; + if( isset( $options['count'] ) ) { + $lengths = array( intval( $options['count'] ) ); + } else { + $lengths = array( 1, 10, 100 ); + } + foreach( $lengths as $length ) { + $urls = randomUrlList( $length ); + $trial = benchSquid( $urls ); + print "$trial\n"; + } +} +?>
\ No newline at end of file diff --git a/maintenance/build-intl-wiki.sql b/maintenance/build-intl-wiki.sql new file mode 100644 index 00000000..f094c8b7 --- /dev/null +++ b/maintenance/build-intl-wiki.sql @@ -0,0 +1,31 @@ +-- Experimental: create shared international database +-- for new interlinking code. +-- + +CREATE DATABASE intl; + +GRANT DELETE,INSERT,SELECT,UPDATE ON intl.* +TO wikiuser@'%' IDENTIFIED BY 'userpass'; +GRANT DELETE,INSERT,SELECT,UPDATE ON intl.* +TO wikiuser@localhost IDENTIFIED BY 'userpass'; +GRANT DELETE,INSERT,SELECT,UPDATE ON intl.* +TO wikiuser@localhost.localdomain IDENTIFIED BY 'userpass'; + +USE intl; + +CREATE TABLE ilinks ( + lang_from varchar(5) default NULL, + lang_to varchar(5) default NULL, + title_from tinyblob, + title_to tinyblob, + target_exists tinyint(1) default NULL +) TYPE=MyISAM; + +CREATE TABLE recentchanges ( + user_name tinyblob, + user_lang varchar(5) default NULL, + date timestamp(14) NOT NULL, + message tinyblob +) TYPE=MyISAM; + + diff --git a/maintenance/changePassword.php b/maintenance/changePassword.php new file mode 100644 index 00000000..591a82b3 --- /dev/null +++ b/maintenance/changePassword.php @@ -0,0 +1,53 @@ +<?php +/** + * Change the password of a given user + * + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later + */ + +class ChangePassword { + var $dbw; + var $user, $password; + + function ChangePassword( $user, $password ) { + $this->user = User::newFromName( $user ); + $this->password = $password; + + $this->dbw =& wfGetDB( DB_MASTER ); + } + + function main() { + $fname = 'ChangePassword::main'; + + $this->dbw->update( 'user', + array( + 'user_password' => wfEncryptPassword( $this->user->getID(), $this->password ) + ), + array( + 'user_id' => $this->user->getID() + ), + $fname + ); + } +} + +$optionsWithArgs = array( 'user', 'password' ); +require_once 'commandLine.inc'; + +if( in_array( '--help', $argv ) ) + wfDie( + "Usage: php changePassword.php [--user=user --password=password | --help]\n" . + "\toptions:\n" . + "\t\t--help\tshow this message\n" . + "\t\t--user\tthe username to operate on\n" . + "\t\t--password\tthe password to use\n" + ); + +$cp = new ChangePassword( @$options['user'], @$options['password'] ); +$cp->main(); +?> diff --git a/maintenance/changeuser.sql b/maintenance/changeuser.sql new file mode 100644 index 00000000..ad1c6da6 --- /dev/null +++ b/maintenance/changeuser.sql @@ -0,0 +1,12 @@ +set @oldname = 'At18'; +set @newname = 'Alfio'; + +update low_priority /*$wgDBprefix*/user set user_name=@newname where user_name=@oldname; +update low_priority /*$wgDBprefix*/user_newtalk set user_ip=@newname where user_ip=@oldname; +update low_priority /*$wgDBprefix*/cur set cur_user_text=@newname where cur_user_text=@oldname; +update low_priority /*$wgDBprefix*/old set old_user_text=@newname where old_user_text=@oldname; +update low_priority /*$wgDBprefix*/archive set ar_user_text=@newname where ar_user_text=@oldname; +update low_priority /*$wgDBprefix*/ipblocks set ipb_address=@newname where ipb_address=@oldname; +update low_priority /*$wgDBprefix*/oldimage set oi_user_text=@newname where oi_user_text=@oldname; +update low_priority /*$wgDBprefix*/recentchanges set rc_user_text=@newname where rc_user_text=@oldname; + diff --git a/maintenance/checkUsernames.php b/maintenance/checkUsernames.php new file mode 100644 index 00000000..b577ebc6 --- /dev/null +++ b/maintenance/checkUsernames.php @@ -0,0 +1,37 @@ +<?php +error_reporting(E_ALL ^ E_NOTICE); +require_once 'commandLine.inc'; + +class checkUsernames { + var $stderr, $log; + + function checkUsernames() { + $this->stderr = fopen( 'php://stderr', 'wt' ); + $this->log = fopen( '/home/wikipedia/logs/checkUsernames.log', 'at' ); + } + function main() { + global $wgDBname; + $fname = 'checkUsernames::main'; + + $dbr =& wfGetDB( DB_SLAVE ); + + $res = $dbr->select( 'user', + array( 'user_id', 'user_name' ), + null, + $fname + ); + + #fwrite( $this->stderr, "Checking $wgDBname\n" ); + while ( $row = $dbr->fetchObject( $res ) ) { + if ( ! User::isValidUserName( $row->user_name ) ) { + $out = sprintf( "%s: %6d: '%s'\n", $wgDBname, $row->user_id, $row->user_name ); + fwrite( $this->stderr, $out ); + fwrite( $this->log, $out ); + } + } + } +} + +$cun = new checkUsernames(); +$cun->main(); +?> diff --git a/maintenance/checktrans.php b/maintenance/checktrans.php new file mode 100644 index 00000000..ebab4c7d --- /dev/null +++ b/maintenance/checktrans.php @@ -0,0 +1,30 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + * Check to see if all messages have been translated into the selected language. + * To run this script, you must have a working installation, and it checks the + * selected language of that installation. + */ + +/** */ +require_once('commandLine.inc'); + +if ( 'en' == $wgLanguageCode ) { + print "Current selected language is English. Cannot check translations.\n"; + exit(); +} + +$count = $total = 0; +$msgarray = 'wgAllMessages' . ucfirst( $wgLanguageCode ); + +foreach ( $wgAllMessagesEn as $code => $msg ) { + ++$total; + if ( ! array_key_exists( $code, $$msgarray ) ) { + print "'{$code}' => \"$msg\",\n"; + ++$count; + } +} + +print "{$count} messages of {$total} not translated.\n"; +?> diff --git a/maintenance/cleanupCaps.php b/maintenance/cleanupCaps.php new file mode 100644 index 00000000..afcd1b33 --- /dev/null +++ b/maintenance/cleanupCaps.php @@ -0,0 +1,158 @@ +<?php +/* + * Script to clean up broken page links when somebody turns on $wgCapitalLinks. + * + * Usage: php cleanupCaps.php [--dry-run] + * Options: + * --dry-run don't actually try moving them + * + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Brion Vibber <brion at pobox.com> + * @package MediaWiki + * @subpackage maintenance + */ + +$options = array( 'dry-run' ); + +require_once( 'commandLine.inc' ); +require_once( 'FiveUpgrade.inc' ); + +class CapsCleanup extends FiveUpgrade { + function CapsCleanup( $dryrun = false, $namespace=0 ) { + parent::FiveUpgrade(); + + $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait + $this->dryrun = $dryrun; + $this->namespace = intval( $namespace ); + } + + function cleanup() { + global $wgCapitalLinks; + if( $wgCapitalLinks ) { + echo "\$wgCapitalLinks is on -- no need for caps links cleanup.\n"; + return false; + } + + $this->runTable( 'page', 'WHERE page_namespace=' . $this->namespace, + array( &$this, 'processPage' ) ); + } + + function init( $count, $table ) { + $this->processed = 0; + $this->updated = 0; + $this->count = $count; + $this->startTime = wfTime(); + $this->table = $table; + } + + function progress( $updated ) { + $this->updated += $updated; + $this->processed++; + if( $this->processed % 100 != 0 ) { + return; + } + $portion = $this->processed / $this->count; + $updateRate = $this->updated / $this->processed; + + $now = wfTime(); + $delta = $now - $this->startTime; + $estimatedTotalTime = $delta / $portion; + $eta = $this->startTime + $estimatedTotalTime; + + printf( "%s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", + wfTimestamp( TS_DB, intval( $now ) ), + $portion * 100.0, + $this->table, + wfTimestamp( TS_DB, intval( $eta ) ), + $this->processed, + $this->count, + $this->processed / $delta, + $updateRate * 100.0 ); + flush(); + } + + function runTable( $table, $where, $callback ) { + $fname = 'CapsCleanup::buildTable'; + + $count = $this->dbw->selectField( $table, 'count(*)', '', $fname ); + $this->init( $count, 'page' ); + $this->log( "Processing $table..." ); + + $tableName = $this->dbr->tableName( $table ); + $sql = "SELECT * FROM $tableName $where"; + $result = $this->dbr->query( $sql, $fname ); + + while( $row = $this->dbr->fetchObject( $result ) ) { + $updated = call_user_func( $callback, $row ); + } + $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); + $this->dbr->freeResult( $result ); + } + + function processPage( $row ) { + global $wgContLang; + + $current = Title::makeTitle( $row->page_namespace, $row->page_title ); + $display = $current->getPrefixedText(); + $upper = $row->page_title; + $lower = $wgContLang->lcfirst( $row->page_title ); + if( $upper == $lower ) { + $this->log( "\"$display\" already lowercase." ); + return $this->progress( 0 ); + } + + $target = Title::makeTitle( $row->page_namespace, $lower ); + $targetDisplay = $target->getPrefixedText(); + if( $target->exists() ) { + $this->log( "\"$display\" skipped; \"$targetDisplay\" already exists" ); + return $this->progress( 0 ); + } + + if( $this->dryrun ) { + $this->log( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED" ); + $ok = true; + } else { + $ok = $current->moveTo( $target, false, 'Converting page titles to lowercase' ); + $this->log( "\"$display\" -> \"$targetDisplay\": $ok" ); + } + if( $ok === true ) { + $this->progress( 1 ); + + if( $row->page_namespace == $this->namespace ) { + $talk = $target->getTalkPage(); + $xrow = $row; + $row->page_namespace = $talk->getNamespace(); + if( $talk->exists() ) { + return $this->processPage( $row ); + } + } + } else { + $this->progress( 0 ); + } + } + +} + +$wgUser->setName( 'Conversion script' ); +$ns = isset( $options['namespace'] ) ? $options['namespace'] : 0; +$caps = new CapsCleanup( isset( $options['dry-run'] ), $ns ); +$caps->cleanup(); + +?> diff --git a/maintenance/cleanupDupes.inc b/maintenance/cleanupDupes.inc new file mode 100644 index 00000000..18daab08 --- /dev/null +++ b/maintenance/cleanupDupes.inc @@ -0,0 +1,131 @@ +<?php +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * If on the old non-unique indexes, check the cur table for duplicate + * entries and remove them... + * + * @package MediaWiki + * @subpackage Maintenance + */ + +function fixDupes( $fixthem = false) { + $dbw =& wfGetDB( DB_MASTER ); + $cur = $dbw->tableName( 'cur' ); + $old = $dbw->tableName( 'old' ); + $dbw->query( "LOCK TABLES $cur WRITE, $old WRITE" ); + echo "Checking for duplicate cur table entries... (this may take a while on a large wiki)\n"; + $res = $dbw->query( <<<END +SELECT cur_namespace,cur_title,count(*) as c,min(cur_id) as id + FROM $cur + GROUP BY cur_namespace,cur_title +HAVING c > 1 +END + ); + $n = $dbw->numRows( $res ); + echo "Found $n titles with duplicate entries.\n"; + if( $n > 0 ) { + if( $fixthem ) { + echo "Correcting...\n"; + } else { + echo "Just a demo...\n"; + } + while( $row = $dbw->fetchObject( $res ) ) { + $ns = intval( $row->cur_namespace ); + $title = $dbw->addQuotes( $row->cur_title ); + + # Get the first responding ID; that'll be the one we keep. + $id = $dbw->selectField( 'cur', 'cur_id', array( + 'cur_namespace' => $row->cur_namespace, + 'cur_title' => $row->cur_title ) ); + + echo "$ns:$row->cur_title (canonical ID $id)\n"; + if( $id != $row->id ) { + echo " ** minimum ID $row->id; "; + $timeMin = $dbw->selectField( 'cur', 'cur_timestamp', array( + 'cur_id' => $row->id ) ); + $timeFirst = $dbw->selectField( 'cur', 'cur_timestamp', array( + 'cur_id' => $id ) ); + if( $timeMin == $timeFirst ) { + echo "timestamps match at $timeFirst; ok\n"; + } else { + echo "timestamps don't match! min: $timeMin, first: $timeFirst; "; + if( $timeMin > $timeFirst ) { + $id = $row->id; + echo "keeping minimum: $id\n"; + } else { + echo "keeping first: $id\n"; + } + } + } + + if( $fixthem ) { + $dbw->query( <<<END +INSERT + INTO $old + (old_namespace, old_title, old_text, + old_comment, old_user, old_user_text, + old_timestamp, old_minor_edit, old_flags, + inverse_timestamp) +SELECT cur_namespace, cur_title, cur_text, + cur_comment, cur_user, cur_user_text, + cur_timestamp, cur_minor_edit, '', + inverse_timestamp + FROM $cur + WHERE cur_namespace=$ns + AND cur_title=$title + AND cur_id != $id +END + ); + $dbw->query( <<<END +DELETE + FROM $cur + WHERE cur_namespace=$ns + AND cur_title=$title + AND cur_id != $id +END + ); + } + } + } + $dbw->query( 'UNLOCK TABLES' ); + if( $fixthem ) { + echo "Done.\n"; + } else { + echo "Run again with --fix option to delete the duplicates.\n"; + } +} + +function checkDupes( $fixthem = false, $indexonly = false ) { + global $wgDBname; + $dbw =& wfGetDB( DB_MASTER ); + if( $dbw->indexExists( 'cur', 'name_title' ) && + $dbw->indexUnique( 'cur', 'name_title' ) ) { + echo "$wgDBname: cur table has the current unique index; no duplicate entries.\n"; + } elseif( $dbw->indexExists( 'cur', 'name_title_dup_prevention' ) ) { + echo "$wgDBname: cur table has a temporary name_title_dup_prevention unique index; no duplicate entries.\n"; + } else { + echo "$wgDBname: cur table has the old non-unique index and may have duplicate entries.\n"; + if( !$indexonly ) { + fixDupes( $fixthem ); + } + } +} + +?>
\ No newline at end of file diff --git a/maintenance/cleanupDupes.php b/maintenance/cleanupDupes.php new file mode 100644 index 00000000..3aea2304 --- /dev/null +++ b/maintenance/cleanupDupes.php @@ -0,0 +1,37 @@ +<?php +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * If on the old non-unique indexes, check the cur table for duplicate + * entries and remove them... + * + * @package MediaWiki + * @subpackage Maintenance + */ + +$options = array( 'fix', 'index' ); + +/** */ +require_once( 'commandLine.inc' ); +require_once( 'cleanupDupes.inc' ); +$wgTitle = Title::newFromText( 'Dupe cur entry cleanup script' ); + +checkDupes( isset( $options['fix'] ), isset( $options['index'] ) ); + +?> diff --git a/maintenance/cleanupSpam.php b/maintenance/cleanupSpam.php new file mode 100644 index 00000000..65d6bc4d --- /dev/null +++ b/maintenance/cleanupSpam.php @@ -0,0 +1,112 @@ +<?php + +require_once( 'commandLine.inc' ); +require_once( "$IP/includes/LinkFilter.php" ); + +function cleanupArticle( $id, $domain ) { + $title = Title::newFromID( $id ); + if ( !$title ) { + print "Internal error: no page for ID $id\n"; + return; + } + + print $title->getPrefixedDBkey() . " ..."; + $rev = Revision::newFromTitle( $title ); + $reverted = false; + $revId = $rev->getId(); + $currentRevId = $revId; + $regex = LinkFilter::makeRegex( $domain ); + + while ( $rev && preg_match( $regex, $rev->getText() ) ) { + # Revision::getPrevious can't be used in this way before MW 1.6 (Revision.php 1.26) + #$rev = $rev->getPrevious(); + $revId = $title->getPreviousRevisionID( $revId ); + if ( $revId ) { + $rev = Revision::newFromTitle( $title, $revId ); + } else { + $rev = false; + } + } + if ( $revId == $currentRevId ) { + // The regex didn't match the current article text + // This happens e.g. when a link comes from a template rather than the page itself + print "False match\n"; + } else { + $dbw =& wfGetDB( DB_MASTER ); + $dbw->immediateBegin(); + if ( !$rev ) { + // Didn't find a non-spammy revision, blank the page + print "blanking\n"; + $article = new Article( $title ); + $article->updateArticle( '', wfMsg( 'spam_blanking', $domain ), + false, false ); + + } else { + // Revert to this revision + print "reverting\n"; + $article = new Article( $title ); + $article->updateArticle( $rev->getText(), wfMsg( 'spam_reverting', $domain ), false, false ); + } + $dbw->immediateCommit(); + wfDoUpdates(); + } +} +//------------------------------------------------------------------------------ + + + + +$username = wfMsg( 'spambot_username' ); +$fname = $username; +$wgUser = User::newFromName( $username ); +// Create the user if necessary +if ( !$wgUser->getID() ) { + $wgUser->addToDatabase(); +} + +if ( !isset( $args[0] ) ) { + print "Usage: php cleanupSpam.php <hostname>\n"; + exit(1); +} +$spec = $args[0]; +$like = LinkFilter::makeLike( $spec ); +if ( !$like ) { + print "Not a valid hostname specification: $spec\n"; + exit(1); +} + +$dbr =& wfGetDB( DB_SLAVE ); + +if ( $options['all'] ) { + // Clean up spam on all wikis + $dbr =& wfGetDB( DB_SLAVE ); + print "Finding spam on " . count($wgLocalDatabases) . " wikis\n"; + $found = false; + foreach ( $wgLocalDatabases as $db ) { + $count = $dbr->selectField( "`$db`.externallinks", 'COUNT(*)', + array( 'el_index LIKE ' . $dbr->addQuotes( $like ) ), $fname ); + if ( $count ) { + $found = true; + passthru( "php cleanupSpam.php $db $spec | sed s/^/$db: /" ); + } + } + if ( $found ) { + print "All done\n"; + } else { + print "None found\n"; + } +} else { + // Clean up spam on this wiki + $res = $dbr->select( 'externallinks', array( 'DISTINCT el_from' ), + array( 'el_index LIKE ' . $dbr->addQuotes( $like ) ), $fname ); + $count = $dbr->numRows( $res ); + print "Found $count articles containing $spec\n"; + while ( $row = $dbr->fetchObject( $res ) ) { + cleanupArticle( $row->el_from, $spec ); + } + if ( $count ) { + print "Done\n"; + } +} + +?> diff --git a/maintenance/cleanupTitles.php b/maintenance/cleanupTitles.php new file mode 100644 index 00000000..930072de --- /dev/null +++ b/maintenance/cleanupTitles.php @@ -0,0 +1,210 @@ +<?php +/* + * Script to clean up broken, unparseable titles. + * + * Usage: php cleanupTitles.php [--dry-run] + * Options: + * --dry-run don't actually try moving them + * + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Brion Vibber <brion at pobox.com> + * @package MediaWiki + * @subpackage maintenance + */ + +$options = array( 'dry-run' ); + +require_once( 'commandLine.inc' ); +require_once( 'FiveUpgrade.inc' ); + +class TitleCleanup extends FiveUpgrade { + function TitleCleanup( $dryrun = false ) { + parent::FiveUpgrade(); + + $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait + $this->dryrun = $dryrun; + } + + function cleanup() { + $this->runTable( 'page', + '', //'WHERE page_namespace=0', + array( &$this, 'processPage' ) ); + } + + function init( $count, $table ) { + $this->processed = 0; + $this->updated = 0; + $this->count = $count; + $this->startTime = wfTime(); + $this->table = $table; + } + + function progress( $updated ) { + $this->updated += $updated; + $this->processed++; + if( $this->processed % 100 != 0 ) { + return; + } + $portion = $this->processed / $this->count; + $updateRate = $this->updated / $this->processed; + + $now = wfTime(); + $delta = $now - $this->startTime; + $estimatedTotalTime = $delta / $portion; + $eta = $this->startTime + $estimatedTotalTime; + + global $wgDBname; + printf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", + $wgDBname, + wfTimestamp( TS_DB, intval( $now ) ), + $portion * 100.0, + $this->table, + wfTimestamp( TS_DB, intval( $eta ) ), + $this->processed, + $this->count, + $this->processed / $delta, + $updateRate * 100.0 ); + flush(); + } + + function runTable( $table, $where, $callback ) { + $fname = 'CapsCleanup::buildTable'; + + $count = $this->dbw->selectField( $table, 'count(*)', '', $fname ); + $this->init( $count, 'page' ); + $this->log( "Processing $table..." ); + + $tableName = $this->dbr->tableName( $table ); + $sql = "SELECT * FROM $tableName $where"; + $result = $this->dbr->query( $sql, $fname ); + + while( $row = $this->dbr->fetchObject( $result ) ) { + $updated = call_user_func( $callback, $row ); + } + $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); + $this->dbr->freeResult( $result ); + } + + function processPage( $row ) { + global $wgContLang; + + $current = Title::makeTitle( $row->page_namespace, $row->page_title ); + $display = $current->getPrefixedText(); + + $verified = UtfNormal::cleanUp( $display ); + + $title = Title::newFromText( $verified ); + + if( is_null( $title ) ) { + $this->log( "page $row->page_id ($display) is illegal." ); + $this->moveIllegalPage( $row ); + return $this->progress( 1 ); + } + + if( !$title->equals( $current ) ) { + $this->log( "page $row->page_id ($display) doesn't match self." ); + $this->moveInconsistentPage( $row, $title ); + return $this->progress( 1 ); + } + + $this->progress( 0 ); + } + + function moveIllegalPage( $row ) { + $legal = 'A-Za-z0-9_/\\\\-'; + $legalized = preg_replace_callback( "!([^$legal])!", + array( &$this, 'hexChar' ), + $row->page_title ); + if( $legalized == '.' ) $legalized = '(dot)'; + if( $legalized == '_' ) $legalized = '(space)'; + $legalized = 'Broken/' . $legalized; + + $title = Title::newFromText( $legalized ); + if( is_null( $title ) ) { + $clean = 'Broken/id:' . $row->page_id; + $this->log( "Couldn't legalize; form '$legalized' still invalid; using '$clean'" ); + $title = Title::newFromText( $clean ); + } elseif( $title->exists() ) { + $clean = 'Broken/id:' . $row->page_id; + $this->log( "Legalized for '$legalized' exists; using '$clean'" ); + $title = Title::newFromText( $clean ); + } + + $dest = $title->getDbKey(); + if( $this->dryrun ) { + $this->log( "DRY RUN: would rename $row->page_id ($row->page_namespace,'$row->page_title') to ($row->page_namespace,'$dest')" ); + } else { + $this->log( "renaming $row->page_id ($row->page_namespace,'$row->page_title') to ($row->page_namespace,'$dest')" ); + $dbw =& wfGetDB( DB_MASTER ); + $dbw->update( 'page', + array( 'page_title' => $dest ), + array( 'page_id' => $row->page_id ), + 'cleanupTitles::moveInconsistentPage' ); + } + } + + function moveInconsistentPage( $row, $title ) { + if( $title->exists() || $title->getInterwiki() ) { + if( $title->getInterwiki() ) { + $prior = $title->getPrefixedDbKey(); + } else { + $prior = $title->getDbKey(); + } + $clean = 'Broken/' . $prior; + $verified = Title::makeTitleSafe( $row->page_namespace, $clean ); + if( $verified->exists() ) { + $blah = "Broken/id:" . $row->page_id; + $this->log( "Couldn't legalize; form '$clean' exists; using '$blah'" ); + $verified = Title::makeTitleSafe( $row->page_namespace, $blah ); + } + $title = $verified; + } + if( is_null( $title ) ) { + wfDie( "Something awry; empty title.\n" ); + } + $ns = $title->getNamespace(); + $dest = $title->getDbKey(); + if( $this->dryrun ) { + $this->log( "DRY RUN: would rename $row->page_id ($row->page_namespace,'$row->page_title') to ($row->page_namespace,'$dest')" ); + } else { + $this->log( "renaming $row->page_id ($row->page_namespace,'$row->page_title') to ($ns,'$dest')" ); + $dbw =& wfGetDB( DB_MASTER ); + $dbw->update( 'page', + array( + 'page_namespace' => $ns, + 'page_title' => $dest + ), + array( 'page_id' => $row->page_id ), + 'cleanupTitles::moveInconsistentPage' ); + $linkCache =& LinkCache::singleton(); + $linkCache->clear(); + } + } + + function hexChar( $matches ) { + return sprintf( "\\x%02x", ord( $matches[1] ) ); + } +} + +$wgUser->setName( 'Conversion script' ); +$caps = new TitleCleanup( isset( $options['dry-run'] ) ); +$caps->cleanup(); + +?> diff --git a/maintenance/cleanupWatchlist.php b/maintenance/cleanupWatchlist.php new file mode 100644 index 00000000..d2925db3 --- /dev/null +++ b/maintenance/cleanupWatchlist.php @@ -0,0 +1,141 @@ +<?php +/* + * Script to remove broken, unparseable titles in the Watchlist. + * + * Usage: php cleanupWatchlist.php [--fix] + * Options: + * --fix Actually remove entries; without will only report. + * + * Copyright (C) 2005,2006 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Brion Vibber <brion at pobox.com> + * @package MediaWiki + * @subpackage maintenance + */ + +$options = array( 'fix' ); + +require_once( 'commandLine.inc' ); +require_once( 'FiveUpgrade.inc' ); + +class WatchlistCleanup extends FiveUpgrade { + function WatchlistCleanup( $dryrun = false ) { + parent::FiveUpgrade(); + + $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait + $this->dryrun = $dryrun; + } + + function cleanup() { + $this->runTable( 'watchlist', + '', + array( &$this, 'processEntry' ) ); + } + + function init( $count, $table ) { + $this->processed = 0; + $this->updated = 0; + $this->count = $count; + $this->startTime = wfTime(); + $this->table = $table; + } + + function progress( $updated ) { + $this->updated += $updated; + $this->processed++; + if( $this->processed % 100 != 0 ) { + return; + } + $portion = $this->processed / $this->count; + $updateRate = $this->updated / $this->processed; + + $now = wfTime(); + $delta = $now - $this->startTime; + $estimatedTotalTime = $delta / $portion; + $eta = $this->startTime + $estimatedTotalTime; + + global $wgDBname; + printf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", + $wgDBname, + wfTimestamp( TS_DB, intval( $now ) ), + $portion * 100.0, + $this->table, + wfTimestamp( TS_DB, intval( $eta ) ), + $this->processed, + $this->count, + $this->processed / $delta, + $updateRate * 100.0 ); + flush(); + } + + function runTable( $table, $where, $callback ) { + $fname = 'WatchlistCleanup::runTable'; + + $count = $this->dbw->selectField( $table, 'count(*)', '', $fname ); + $this->init( $count, 'watchlist' ); + $this->log( "Processing $table..." ); + + $tableName = $this->dbr->tableName( $table ); + $sql = "SELECT * FROM $tableName $where"; + $result = $this->dbr->query( $sql, $fname ); + + while( $row = $this->dbr->fetchObject( $result ) ) { + $updated = call_user_func( $callback, $row ); + } + $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); + $this->dbr->freeResult( $result ); + } + + function processEntry( $row ) { + global $wgContLang; + + $current = Title::makeTitle( $row->wl_namespace, $row->wl_title ); + $display = $current->getPrefixedText(); + + $verified = UtfNormal::cleanUp( $display ); + + $title = Title::newFromText( $verified ); + + if( $row->wl_user == 0 || is_null( $title ) || !$title->equals( $current ) ) { + $this->log( "invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")" ); + $this->removeWatch( $row ); + return $this->progress( 1 ); + } + + $this->progress( 0 ); + } + + function removeWatch( $row ) { + if( !$this->dryrun) { + $dbw =& wfGetDB( DB_MASTER ); + $dbw->delete( 'watchlist', array( + 'wl_user' => $row->wl_user, + 'wl_namespace' => $row->wl_namespace, + 'wl_title' => $row->wl_title ), + 'WatchlistCleanup::removeWatch' ); + $this->log( '- removed' ); + } + } +} + +$wgUser->setName( 'Conversion script' ); +$caps = new WatchlistCleanup( !isset( $options['fix'] ) ); +$caps->cleanup(); + +?> diff --git a/maintenance/clear_interwiki_cache.php b/maintenance/clear_interwiki_cache.php new file mode 100644 index 00000000..97869728 --- /dev/null +++ b/maintenance/clear_interwiki_cache.php @@ -0,0 +1,26 @@ +<?php +/** + * This script is used to clear the interwiki links for ALL languages in + * memcached. + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once('commandLine.inc'); + +$dbr =& wfGetDB( DB_SLAVE ); +$res = $dbr->select( 'interwiki', array( 'iw_prefix' ), false ); +$prefixes = array(); +while ( $row = $dbr->fetchObject( $res ) ) { + $prefixes[] = $row->iw_prefix; +} + +foreach ( $wgLocalDatabases as $db ) { + print "$db "; + foreach ( $prefixes as $prefix ) { + $wgMemc->delete("$db:interwiki:$prefix"); + } +} +print "\n"; +?> diff --git a/maintenance/clear_stats.php b/maintenance/clear_stats.php new file mode 100644 index 00000000..00cfd0ce --- /dev/null +++ b/maintenance/clear_stats.php @@ -0,0 +1,31 @@ +<?php +require_once('commandLine.inc'); + +foreach ( $wgLocalDatabases as $db ) { + noisyDelete("$db:stats:request_with_session"); + noisyDelete("$db:stats:request_without_session"); + noisyDelete("$db:stats:pcache_hit"); + noisyDelete("$db:stats:pcache_miss_invalid"); + noisyDelete("$db:stats:pcache_miss_expired"); + noisyDelete("$db:stats:pcache_miss_absent"); + noisyDelete("$db:stats:pcache_miss_stub"); + noisyDelete("$db:stats:image_cache_hit"); + noisyDelete("$db:stats:image_cache_miss"); + noisyDelete("$db:stats:image_cache_update"); + noisyDelete("$db:stats:diff_cache_hit"); + noisyDelete("$db:stats:diff_cache_miss"); + noisyDelete("$db:stats:diff_uncacheable"); +} + +function noisyDelete( $key ) { + global $wgMemc; + /* + print "$key "; + if ( $wgMemc->delete($key) ) { + print "deleted\n"; + } else { + print "FAILED\n"; + }*/ + $wgMemc->delete($key); +} +?> diff --git a/maintenance/commandLine.inc b/maintenance/commandLine.inc new file mode 100644 index 00000000..2bb5389e --- /dev/null +++ b/maintenance/commandLine.inc @@ -0,0 +1,232 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +$wgRequestTime = microtime(true); + +/** */ +# Abort if called from a web server +if ( isset( $_SERVER ) && array_key_exists( 'REQUEST_METHOD', $_SERVER ) ) { + print "This script must be run from the command line\n"; + exit(); +} + +define('MEDIAWIKI',true); + +# Process command line arguments +# $options becomes an array with keys set to the option names +# $optionsWithArgs is an array of GNU-style options that take an argument. The arguments are returned +# in the values of $options. +# $args becomes a zero-based array containing the non-option arguments + +if ( !isset( $optionsWithArgs ) ) { + $optionsWithArgs = array(); +} +$optionsWithArgs[] = 'conf'; # For specifying the location of LocalSettings.php + +$self = array_shift( $argv ); +$self = __FILE__; +$IP = realpath( dirname( $self ) . '/..' ); +#chdir( $IP ); + +$options = array(); +$args = array(); + + +# Parse arguments + +for( $arg = reset( $argv ); $arg !== false; $arg = next( $argv ) ) { + if ( $arg == '--' ) { + # End of options, remainder should be considered arguments + $arg = next( $argv ); + while( $arg !== false ) { + $args[] = $arg; + $arg = next( $argv ); + } + break; + } elseif ( substr( $arg, 0, 2 ) == '--' ) { + # Long options + $option = substr( $arg, 2 ); + if ( in_array( $option, $optionsWithArgs ) ) { + $param = next( $argv ); + if ( $param === false ) { + echo "$arg needs an value after it\n"; + die( -1 ); + } + $options[$option] = $param; + } else { + $bits = explode( '=', $option, 2 ); + if( count( $bits ) > 1 ) { + $option = $bits[0]; + $param = $bits[1]; + } else { + $param = 1; + } + $options[$option] = $param; + } + } elseif ( substr( $arg, 0, 1 ) == '-' ) { + # Short options + for ( $p=1; $p<strlen( $arg ); $p++ ) { + $option = $arg{$p}; + if ( in_array( $option, $optionsWithArgs ) ) { + $param = next( $argv ); + if ( $param === false ) { + echo "$arg needs an value after it\n"; + die( -1 ); + } + $options[$option] = $param; + } else { + $options[$option] = 1; + } + } + } else { + $args[] = $arg; + } +} + + +# General initialisation + +$wgCommandLineMode = true; +# Turn off output buffering if it's on +@ob_end_flush(); +$sep = PATH_SEPARATOR; + +if (!isset( $wgUseNormalUser ) ) { + $wgUseNormalUser = false; +} + +if ( file_exists( '/home/wikipedia/common/langlist' ) ) { + $wgWikiFarm = true; + $cluster = trim( file_get_contents( '/etc/cluster' ) ); + require_once( "$IP/includes/SiteConfiguration.php" ); + + # Get $wgConf + require( "$IP/wgConf.php" ); + + if ( empty( $wgNoDBParam ) ) { + # Check if we were passed a db name + $db = array_shift( $args ); + list( $site, $lang ) = $wgConf->siteFromDB( $db ); + + # If not, work out the language and site the old way + if ( is_null( $site ) || is_null( $lang ) ) { + if ( !$db ) { + $lang = 'aa'; + } else { + $lang = $db; + } + if ( isset( $args[0] ) ) { + $site = array_shift( $args ); + } else { + $site = 'wikipedia'; + } + } + } else { + $lang = 'aa'; + $site = 'wikipedia'; + } + + # This is for the IRC scripts, which now run as the apache user + # The apache user doesn't have access to the wikiadmin_pass command + if ( $_ENV['USER'] == 'apache' ) { + $wgUseNormalUser = true; + } + + putenv( 'wikilang='.$lang); + + $DP = $IP; + ini_set( 'include_path', ".:$IP:$IP/includes:$IP/languages:$IP/maintenance" ); + + require_once( $IP.'/includes/ProfilerStub.php' ); + require_once( $IP.'/includes/Defines.php' ); + require_once( $IP.'/CommonSettings.php' ); + + $bin = '/home/wikipedia/bin'; + if ( $wgUseRootUser ) { + $wgDBuser = $wgDBadminuser = 'root'; + $wgDBpassword = $wgDBadminpassword = trim(`$bin/mysql_root_pass`); + } elseif ( !$wgUseNormalUser ) { + $wgDBuser = $wgDBadminuser = 'wikiadmin'; + $wgDBpassword = $wgDBadminpassword = trim(`$bin/wikiadmin_pass`); + } +} else { + $wgWikiFarm = false; + if ( isset( $options['conf'] ) ) { + $settingsFile = $options['conf']; + } else { + $settingsFile = "$IP/LocalSettings.php"; + } + + if ( ! is_readable( $settingsFile ) ) { + print "A copy of your installation's LocalSettings.php\n" . + "must exist in the source directory.\n"; + exit( 1 ); + } + $wgCommandLineMode = true; + $DP = $IP; + require_once( $IP.'/includes/ProfilerStub.php' ); + require_once( $IP.'/includes/Defines.php' ); + require_once( $settingsFile ); + ini_set( 'include_path', ".$sep$IP$sep$IP/includes$sep$IP/languages$sep$IP/maintenance" ); + + if ( is_readable( $IP.'/AdminSettings.php' ) ) { + require_once( $IP.'/AdminSettings.php' ); + } +} + +# Turn off output buffering again, it might have been turned on in the settings files +@ob_end_flush(); +# Same with these +$wgCommandLineMode = true; + +if ( empty( $wgUseNormalUser ) && isset( $wgDBadminuser ) ) { + $wgDBuser = $wgDBadminuser; + $wgDBpassword = $wgDBadminpassword; + + if( $wgDBservers ) { + foreach ( $wgDBservers as $i => $server ) { + $wgDBservers[$i]['user'] = $wgDBuser; + $wgDBservers[$i]['password'] = $wgDBpassword; + } + } +} + +if ( defined( 'MW_CMDLINE_CALLBACK' ) ) { + $fn = MW_CMDLINE_CALLBACK; + $fn(); +} + +ini_set( 'memory_limit', -1 ); + +require_once( 'Setup.php' ); +require_once( 'install-utils.inc' ); +$wgTitle = Title::newFromText( 'Command line script' ); +set_time_limit(0); + +// -------------------------------------------------------------------- +// Functions +// -------------------------------------------------------------------- + +function wfWaitForSlaves( $maxLag ) { + global $wgLoadBalancer; + if ( $maxLag ) { + list( $host, $lag ) = $wgLoadBalancer->getMaxLag(); + while ( $lag > $maxLag ) { + $name = @gethostbyaddr( $host ); + if ( $name !== false ) { + $host = $name; + } + print "Waiting for $host (lagged $lag seconds)...\n"; + sleep($maxLag); + list( $host, $lag ) = $wgLoadBalancer->getMaxLag(); + } + } +} + + + +?> diff --git a/maintenance/convertLinks.inc b/maintenance/convertLinks.inc new file mode 100644 index 00000000..f0d2c439 --- /dev/null +++ b/maintenance/convertLinks.inc @@ -0,0 +1,220 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +function convertLinks() { + global $wgDBtype; + if( $wgDBtype == 'PostgreSQL' ) { + print "Links table already ok on PostgreSQL.\n"; + return; + } + + print "Converting links table to ID-ID...\n"; + + global $wgLang, $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; + global $noKeys, $logPerformance, $fh; + + $numRows = $tuplesAdded = $numBadLinks = $curRowsRead = 0; #counters etc + $totalTuplesInserted = 0; # total tuples INSERTed into links_temp + + $reportCurReadProgress = true; #whether or not to give progress reports while reading IDs from cur table + $curReadReportInterval = 1000; #number of rows between progress reports + + $reportLinksConvProgress = true; #whether or not to give progress reports during conversion + $linksConvInsertInterval = 1000; #number of rows per INSERT + + $initialRowOffset = 0; + #$finalRowOffset = 0; # not used yet; highest row number from links table to process + + # Overwrite the old links table with the new one. If this is set to false, + # the new table will be left at links_temp. + $overwriteLinksTable = true; + + # Don't create keys, and so allow duplicates in the new links table. + # This gives a huge speed improvement for very large links tables which are MyISAM. (What about InnoDB?) + $noKeys = false; + + + $logPerformance = false; # output performance data to a file + $perfLogFilename = "convLinksPerf.txt"; + #-------------------------------------------------------------------- + + $dbw =& wfGetDB( DB_MASTER ); + extract( $dbw->tableNames( 'cur', 'links', 'links_temp', 'links_backup' ) ); + + $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" ); + if ( $dbw->fieldType( $res, 0 ) == "int" ) { + print "Schema already converted\n"; + return; + } + + $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" ); + $row = $dbw->fetchObject($res); + $numRows = $row->count; + $dbw->freeResult( $res ); + + if ( $numRows == 0 ) { + print "Updating schema (no rows to convert)...\n"; + createTempTable(); + } else { + if ( $logPerformance ) { $fh = fopen ( $perfLogFilename, "w" ); } + $baseTime = $startTime = getMicroTime(); + # Create a title -> cur_id map + print "Loading IDs from $cur table...\n"; + performanceLog ( "Reading $numRows rows from cur table...\n" ); + performanceLog ( "rows read vs seconds elapsed:\n" ); + + $dbw->bufferResults( false ); + $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" ); + $ids = array(); + + while ( $row = $dbw->fetchObject( $res ) ) { + $title = $row->cur_title; + if ( $row->cur_namespace ) { + $title = $wgLang->getNsText( $row->cur_namespace ) . ":$title"; + } + $ids[$title] = $row->cur_id; + $curRowsRead++; + if ($reportCurReadProgress) { + if (($curRowsRead % $curReadReportInterval) == 0) { + performanceLog( $curRowsRead . " " . (getMicroTime() - $baseTime) . "\n" ); + print "\t$curRowsRead rows of $cur table read.\n"; + } + } + } + $dbw->freeResult( $res ); + $dbw->bufferResults( true ); + print "Finished loading IDs.\n\n"; + performanceLog( "Took " . (getMicroTime() - $baseTime) . " seconds to load IDs.\n\n" ); + #-------------------------------------------------------------------- + + # Now, step through the links table (in chunks of $linksConvInsertInterval rows), + # convert, and write to the new table. + createTempTable(); + performanceLog( "Resetting timer.\n\n" ); + $baseTime = getMicroTime(); + print "Processing $numRows rows from $links table...\n"; + performanceLog( "Processing $numRows rows from $links table...\n" ); + performanceLog( "rows inserted vs seconds elapsed:\n" ); + + for ($rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval) { + $sqlRead = "SELECT * FROM $links "; + $sqlRead = $dbw->limitResult($sqlRead, $linksConvInsertInterval,$rowOffset); + $res = $dbw->query($sqlRead); + if ( $noKeys ) { + $sqlWrite = array("INSERT INTO $links_temp (l_from,l_to) VALUES "); + } else { + $sqlWrite = array("INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES "); + } + + $tuplesAdded = 0; # no tuples added to INSERT yet + while ( $row = $dbw->fetchObject($res) ) { + $fromTitle = $row->l_from; + if ( array_key_exists( $fromTitle, $ids ) ) { # valid title + $from = $ids[$fromTitle]; + $to = $row->l_to; + if ( $tuplesAdded != 0 ) { + $sqlWrite[] = ","; + } + $sqlWrite[] = "($from,$to)"; + $tuplesAdded++; + } else { # invalid title + $numBadLinks++; + } + } + $dbw->freeResult($res); + #print "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n"; + if ( $tuplesAdded != 0 ) { + if ($reportLinksConvProgress) { + print "Inserting $tuplesAdded tuples into $links_temp..."; + } + $dbw->query( implode("",$sqlWrite) ); + $totalTuplesInserted += $tuplesAdded; + if ($reportLinksConvProgress) + print " done. Total $totalTuplesInserted tuples inserted.\n"; + performanceLog( $totalTuplesInserted . " " . (getMicroTime() - $baseTime) . "\n" ); + } + } + print "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n"; + performanceLog( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" ); + performanceLog( "Total execution time: " . (getMicroTime() - $startTime) . " seconds.\n" ); + if ( $logPerformance ) { fclose ( $fh ); } + } + #-------------------------------------------------------------------- + + if ( $overwriteLinksTable ) { + $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + if (!($dbConn->isOpen())) { + print "Opening connection to database failed.\n"; + return; + } + # Check for existing links_backup, and delete it if it exists. + print "Dropping backup links table if it exists..."; + $dbConn->query( "DROP TABLE IF EXISTS $links_backup", DB_MASTER); + print " done.\n"; + + # Swap in the new table, and move old links table to links_backup + print "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..."; + $dbConn->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", DB_MASTER ); + print " done.\n\n"; + + $dbConn->close(); + print "Conversion complete. The old table remains at $links_backup;\n"; + print "delete at your leisure.\n"; + } else { + print "Conversion complete. The converted table is at $links_temp;\n"; + print "the original links table is unchanged.\n"; + } +} + +#-------------------------------------------------------------------- + +function createTempTable() { + global $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname; + global $noKeys; + $dbConn = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + + if (!($dbConn->isOpen())) { + print "Opening connection to database failed.\n"; + return; + } + $links_temp = $dbConn->tableName( 'links_temp' ); + + print "Dropping temporary links table if it exists..."; + $dbConn->query( "DROP TABLE IF EXISTS $links_temp"); + print " done.\n"; + + print "Creating temporary links table..."; + if ( $noKeys ) { + $dbConn->query( "CREATE TABLE $links_temp ( " . + "l_from int(8) unsigned NOT NULL default '0', " . + "l_to int(8) unsigned NOT NULL default '0')"); + } else { + $dbConn->query( "CREATE TABLE $links_temp ( " . + "l_from int(8) unsigned NOT NULL default '0', " . + "l_to int(8) unsigned NOT NULL default '0', " . + "UNIQUE KEY l_from(l_from,l_to), " . + "KEY (l_to))"); + } + print " done.\n\n"; +} + +function performanceLog( $text ) { + global $logPerformance, $fh; + if ( $logPerformance ) { + fwrite( $fh, $text ); + } +} + +function getMicroTime() { # return time in seconds, with microsecond accuracy + list($usec, $sec) = explode(" ", microtime()); + return ((float)$usec + (float)$sec); +} + + + +?> diff --git a/maintenance/convertLinks.php b/maintenance/convertLinks.php new file mode 100644 index 00000000..5939b943 --- /dev/null +++ b/maintenance/convertLinks.php @@ -0,0 +1,16 @@ +<?php +/** + * Convert from the old links schema (string->ID) to the new schema (ID->ID) + * The wiki should be put into read-only mode while this script executes + * + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); +require_once( "convertLinks.inc" ); + +convertLinks(); + +?> diff --git a/maintenance/counter.php b/maintenance/counter.php new file mode 100644 index 00000000..d84c877d --- /dev/null +++ b/maintenance/counter.php @@ -0,0 +1,5 @@ +<?php +function print_c($last, $current) { + echo str_repeat( chr(8), strlen( $last ) ) . $current; +} +?> diff --git a/maintenance/createAndPromote.php b/maintenance/createAndPromote.php new file mode 100644 index 00000000..df29c114 --- /dev/null +++ b/maintenance/createAndPromote.php @@ -0,0 +1,48 @@ +<?php + +/** + * Maintenance script to create an account and grant it administrator rights + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +require_once( 'commandLine.inc' ); + +if( !count( $args ) == 2 ) { + echo( "Please provide a username and password for the new account.\n" ); + die( 1 ); +} + +$username = $args[0]; +$password = $args[1]; + +global $wgDBname; +echo( "{$wgDBname}: Creating and promoting User:{$username}..." ); + +# Validate username and check it doesn't exist +$user = User::newFromName( $username ); +if( !is_object( $user ) ) { + echo( "invalid username.\n" ); + die( 1 ); +} elseif( 0 != $user->idForName() ) { + echo( "account exists.\n" ); + die( 1 ); +} + +# Insert the account into the database +$user->addToDatabase(); +$user->setPassword( $password ); +$user->setToken(); + +# Promote user +$user->addGroup( 'sysop' ); + +# Increment site_stats.ss_users +$ssu = new SiteStatsUpdate( 0, 0, 0, 0, 1 ); +$ssu->doUpdate(); + +echo( "done.\n" ); + +?>
\ No newline at end of file diff --git a/maintenance/database.sql b/maintenance/database.sql new file mode 100644 index 00000000..dea99542 --- /dev/null +++ b/maintenance/database.sql @@ -0,0 +1,7 @@ +-- SQL script to create database for wiki. This is run from +-- the installation script which replaces the variables with +-- their values from local settings. +-- + +DROP DATABASE IF EXISTS `{$wgDBname}`; +CREATE DATABASE `{$wgDBname}`; diff --git a/maintenance/delete-idle-wiki-users.pl b/maintenance/delete-idle-wiki-users.pl new file mode 100644 index 00000000..aef68ccd --- /dev/null +++ b/maintenance/delete-idle-wiki-users.pl @@ -0,0 +1,138 @@ +#!/usr/bin/perl +# +# Nuke idle wiki accounts from the wiki's user database. +# +# Copyright (C) 2005 Ralf Baechle (ralf@linux-mips.org) +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +# NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 675 Mass Ave, Cambridge, MA 02139, USA. +# + +my $database = "DBI:mysql:database=wikidb;host=localhost"; +my $dbuser = "wikiuser"; +my $dbpasswd = "password"; + +use strict; +use DBI(); + +my $verbose = 0; +my $for_real = 1; + +sub do_db_op +{ + my ($dbh, $sql) = @_; + + if ($verbose >= 3) { + print $sql . ";\n" + } + + if ($for_real == 1) { + $dbh->do($sql); + } +} + +sub undo_user +{ + my ($ref, $dbh, $sth, $killed); + + # Connect to the database. + $dbh = DBI->connect($database, $dbuser, $dbpasswd, {RaiseError => 1}); + + $sth = $dbh->prepare("SELECT * FROM user"); + $sth->execute(); + + $ref = $sth->fetchrow_hashref(); + + if ($sth->rows == 0) { + print "There is no user in this wiki.\n"; + return; + } + + while ($ref = $sth->fetchrow_hashref()) { + my ($user_id, $user_name, $cph, $oph, $edits); + + $user_name = $ref->{user_name}; + $user_id = $ref->{user_id}; + if ($verbose >= 2) { + print "Annihilating user " . $user_name . + " has user_id " . $user_id . ".\n"; + } + + $cph = $dbh->prepare("SELECT * FROM cur where " . + "cur_user = $user_id" . + " AND " . + "cur_user_text = " . $dbh->quote("$user_name")); + $cph->execute(); + + $oph = $dbh->prepare("SELECT * FROM old where " . + "old_user = $user_id" . + " AND " . + "old_user_text = " . $dbh->quote("$user_name")); + $oph->execute(); + + $edits = $cph->rows + $oph->rows; + + $cph->finish(); + $oph->finish(); + + if ($edits == 0) { + if ($verbose >= 2) { + print "Keeping user " . $user_name . + ", user_id " . $user_id . ".\n"; + } + + do_db_op($dbh, + "DELETE FROM user WHERE user_name = " . + $dbh->quote("$user_name") . + " AND " . + "user_id = $user_id"); + + $killed++; + } + } + + $sth->finish(); + + $dbh->disconnect(); + + if ($verbose >= 1) { + print "Killed " . $killed . " users\n"; + } +} + +my (@users, $user, $this, $opts); + +@users = (); +$opts = 1; + +foreach $this (@ARGV) { + if ($opts == 1 && $this eq '-v') { + $verbose++; + } elsif ($opts == 1 && $this eq '--verbose') { + $verbose = 1; + } elsif ($opts == 1 && $this eq '--') { + $opts = 0; + } else { + push(@users, $this); + } +} + +undo_user(); + diff --git a/maintenance/deleteBatch.php b/maintenance/deleteBatch.php new file mode 100644 index 00000000..697dffd7 --- /dev/null +++ b/maintenance/deleteBatch.php @@ -0,0 +1,85 @@ +<?php + +# delete a batch of pages +# Usage: php deleteBatch.php [-u <user>] [-r <reason>] [-i <interval>] <listfile> +# where +# <listfile> is a file where each line has two titles separated by a pipe +# character. The first title is the source, the second is the destination. +# <user> is the username +# <reason> is the move reason +# <interval> is the number of seconds to sleep for after each move + +$oldCwd = getcwd(); +$optionsWithArgs = array( 'u', 'r', 'i' ); +require_once( 'commandLine.inc' ); + +chdir( $oldCwd ); + +# Options processing + +$filename = 'php://stdin'; +$user = 'Delete page script'; +$reason = ''; +$interval = 0; + +if ( isset( $args[0] ) ) { + $filename = $args[0]; +} +if ( isset( $options['u'] ) ) { + $user = $options['u']; +} +if ( isset( $options['r'] ) ) { + $reason = $options['r']; +} +if ( isset( $options['i'] ) ) { + $interval = $options['i']; +} + +$wgUser = User::newFromName( $user ); + + +# Setup complete, now start + +$file = fopen( $filename, 'r' ); +if ( !$file ) { + print "Unable to read file, exiting\n"; + exit; +} + +$dbw =& wfGetDB( DB_MASTER ); + +for ( $linenum = 1; !feof( $file ); $linenum++ ) { + $line = trim( fgets( $file ) ); + if ( $line === false ) { + break; + } + $page = Title::newFromText( $line ); + if ( is_null( $page ) ) { + print "Invalid title '$line' on line $linenum\n"; + continue; + } + if( !$page->exists() ) { + print "Skipping nonexistent page '$line'\n"; + continue; + } + + + print $page->getPrefixedText(); + $dbw->begin(); + if( $page->getNamespace() == NS_IMAGE ) { + $art = new ImagePage( $page ); + } else { + $art = new Article( $page ); + } + $art->doDelete( $reason ); + $dbw->immediateCommit(); + print "\n"; + + if ( $interval ) { + sleep( $interval ); + } + wfWaitForSlaves( 5 ); +} + + +?> diff --git a/maintenance/deleteImageMemcached.php b/maintenance/deleteImageMemcached.php new file mode 100644 index 00000000..4e17d21e --- /dev/null +++ b/maintenance/deleteImageMemcached.php @@ -0,0 +1,60 @@ +<?php +// php deleteImageMemcached.php --until "2005-09-05 00:00:00" --sleep 0 --report 10 +$optionsWithArgs = array( 'until', 'sleep', 'report' ); + +require_once 'commandLine.inc'; + +class DeleteImageCache { + var $until, $sleep, $report; + + function DeleteImageCache( $until, $sleep, $report ) { + $this->until = $until; + $this->sleep = $sleep; + $this->report = $report; + } + + function main() { + global $wgMemc, $wgDBname; + $fname = 'DeleteImageCache::main'; + + ini_set( 'display_errors', false ); + + $dbr =& wfGetDB( DB_SLAVE ); + + $res = $dbr->select( 'image', + array( 'img_name' ), + array( "img_timestamp < {$this->until}" ), + $fname + ); + + $i = 0; + $total = $this->getImageCount(); + + while ( $row = $dbr->fetchObject( $res ) ) { + if ($i % $this->report == 0) + printf("%s: %13s done (%s)\n", $wgDBname, "$i/$total", wfPercent( $i / $total * 100 )); + $md5 = md5( $row->img_name ); + $wgMemc->delete( "$wgDBname:Image:$md5" ); + + if ($this->sleep != 0) + usleep( $this->sleep ); + + ++$i; + } + } + + function getImageCount() { + $fname = 'DeleteImageCache::getImageCount'; + + $dbr =& wfGetDB( DB_SLAVE ); + return $dbr->selectField( 'image', 'COUNT(*)', array(), $fname ); + } +} + +$until = preg_replace( "/[^\d]/", '', $options['until'] ); +$sleep = (int)$options['sleep'] * 1000; // milliseconds +$report = (int)$options['report']; + +$dic = new DeleteImageCache( $until, $sleep, $report ); +$dic->main(); +?> diff --git a/maintenance/deleteOldRevisions.inc b/maintenance/deleteOldRevisions.inc new file mode 100644 index 00000000..dd48028a --- /dev/null +++ b/maintenance/deleteOldRevisions.inc @@ -0,0 +1,60 @@ +<?php + +/** + * Support functions for the deleteOldRevisions script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +require_once( 'purgeOldText.inc' ); + +function DeleteOldRevisions( $delete = false ) { + + # Data should come off the master, wrapped in a transaction + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_pag = $dbw->tableName( 'page' ); + $tbl_rev = $dbw->tableName( 'revision' ); + + # Get "active" revisions from the page table + echo( "Searching for active revisions..." ); + $res = $dbw->query( "SELECT page_latest FROM $tbl_pag" ); + while( $row = $dbw->fetchObject( $res ) ) { + $cur[] = $row->page_latest; + } + echo( "done.\n" ); + + # Get all revisions that aren't in this set + echo( "Searching for inactive revisions..." ); + $set = implode( ', ', $cur ); + $res = $dbw->query( "SELECT rev_id FROM $tbl_rev WHERE rev_id NOT IN ( $set )" ); + while( $row = $dbw->fetchObject( $res ) ) { + $old[] = $row->rev_id; + } + echo( "done.\n" ); + + # Inform the user of what we're going to do + $count = count( $old ); + echo( "$count old revisions found.\n" ); + + # Delete as appropriate + if( $delete && $count ) { + echo( "Deleting..." ); + $set = implode( ', ', $old ); + $dbw->query( "DELETE FROM $tbl_rev WHERE rev_id IN ( $set )" ); + echo( "done.\n" ); + } + + # This bit's done + # Purge redundant text records + $dbw->commit(); + if( $delete ) { + PurgeRedundantText( true ); + } + +} + +?>
\ No newline at end of file diff --git a/maintenance/deleteOldRevisions.php b/maintenance/deleteOldRevisions.php new file mode 100644 index 00000000..9695a8c5 --- /dev/null +++ b/maintenance/deleteOldRevisions.php @@ -0,0 +1,30 @@ +<?php + +/** + * Delete old (non-current) revisions from the database + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +$options = array( 'delete', 'help' ); +require_once( 'commandLine.inc' ); +require_once( 'deleteOldRevisions.inc' ); + +echo( "Delete Old Revisions\n\n" ); + +if( @$options['help'] ) { + ShowUsage(); +} else { + DeleteOldRevisions( @$options['delete'] ); +} + +function ShowUsage() { + echo( "Deletes non-current revisions from the database.\n\n" ); + echo( "Usage: php deleteOldRevisions.php [--delete|--help]\n\n" ); + echo( "delete : Performs the deletion\n" ); + echo( " help : Show this usage information\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/deleteOrphanedRevisions.inc.php b/maintenance/deleteOrphanedRevisions.inc.php new file mode 100644 index 00000000..7cfb1c6b --- /dev/null +++ b/maintenance/deleteOrphanedRevisions.inc.php @@ -0,0 +1,33 @@ +<?php + +/** + * Support functions for the deleteOrphanedRevisions maintenance script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +/** + * Delete one or more revisions from the database + * Do this inside a transaction + * + * @param $id Array of revision id values + * @param $db Database class (needs to be a master) + */ +function deleteRevisions( $id, &$dbw ) { + if( !is_array( $id ) ) + $id = array( $id ); + $dbw->delete( 'revision', array( 'rev_id' => $id ), 'deleteRevision' ); +} + +/** + * Spit out script usage information and exit + */ +function showUsage() { + echo( "Finds revisions which refer to nonexisting pages and deletes them from the database\n" ); + echo( "USAGE: php deleteOrphanedRevisions.php [--report]\n\n" ); + echo( " --report : Prints out a count of affected revisions but doesn't delete them\n\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/deleteOrphanedRevisions.php b/maintenance/deleteOrphanedRevisions.php new file mode 100644 index 00000000..b4f5b517 --- /dev/null +++ b/maintenance/deleteOrphanedRevisions.php @@ -0,0 +1,55 @@ +<?php + +/** + * Maintenance script to delete revisions which refer to a nonexisting page + * Sometimes manual deletion done in a rush leaves crap in the database + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + * @todo More efficient cleanup of text records + */ + +$options = array( 'report', 'help' ); +require_once( 'commandLine.inc' ); +require_once( 'deleteOrphanedRevisions.inc.php' ); +echo( "Delete Orphaned Revisions\n" ); + +if( isset( $options['help'] ) ) + showUsage(); + +$report = isset( $options['report'] ); + +$dbw =& wfGetDB( DB_MASTER ); +$dbw->immediateBegin(); +extract( $dbw->tableNames( 'page', 'revision' ) ); + +# Find all the orphaned revisions +echo( "Checking for orphaned revisions..." ); +$sql = "SELECT rev_id FROM {$revision} LEFT JOIN {$page} ON rev_page = page_id WHERE page_namespace IS NULL"; +$res = $dbw->query( $sql, 'deleteOrphanedRevisions' ); + +# Stash 'em all up for deletion (if needed) +while( $row = $dbw->fetchObject( $res ) ) + $revisions[] = $row->rev_id; +$dbw->freeResult( $res ); +$count = count( $revisions ); +echo( "found {$count}.\n" ); + +# Nothing to do? +if( $report || $count == 0 ) { + $dbw->immediateCommit(); + exit(); +} + +# Delete each revision +echo( "Deleting..." ); +deleteRevisions( $revisions, $dbw ); +echo( "done.\n" ); + +# Close the transaction and call the script to purge unused text records +$dbw->immediateCommit(); +require_once( 'purgeOldText.inc' ); +PurgeRedundantText( true ); + +?>
\ No newline at end of file diff --git a/maintenance/deleteRevision.php b/maintenance/deleteRevision.php new file mode 100644 index 00000000..e7d005b6 --- /dev/null +++ b/maintenance/deleteRevision.php @@ -0,0 +1,40 @@ +<?php +require_once( 'commandLine.inc' ); + +$dbw =& wfGetDB( DB_MASTER ); + +if ( count( $args ) == 0 ) { + echo "Usage: php deleteRevision.php <revid> [<revid> ...]\n"; + exit(1); +} + +echo "Deleting revision(s) " . implode( ',', $args ) . " from $wgDBname...\n"; + +$affected = 0; +foreach ( $args as $revID ) { + $dbw->insertSelect( 'archive', array( 'page', 'revision' ), + array( + 'ar_namespace' => 'page_namespace', + 'ar_title' => 'page_title', + 'ar_comment' => 'rev_comment', + 'ar_user' => 'rev_user', + 'ar_user_text' => 'rev_user_text', + 'ar_timestamp' => 'rev_timestamp', + 'ar_minor_edit' => 'rev_minor_edit', + 'ar_rev_id' => 'rev_id', + 'ar_text_id' => 'rev_text_id', + ), array( + 'rev_id' => $revID, + 'page_id = rev_page' + ), $fname + ); + if ( !$dbw->affectedRows() ) { + echo "Revision $revID not found\n"; + } else { + $affected += $dbw->affectedRows(); + $dbw->delete( 'revision', array( 'rev_id' => $revID ) ); + } +} + +print "Deleted $affected revisions\n"; +?> diff --git a/maintenance/diffLanguage.php b/maintenance/diffLanguage.php new file mode 100644 index 00000000..eb87b3ba --- /dev/null +++ b/maintenance/diffLanguage.php @@ -0,0 +1,159 @@ +<?php +# MediaWiki web-based config/installation +# Copyright (C) 2004 Ashar Voultoiz <thoane@altern.org> and others +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Usage: php DiffLanguage.php [lang [file]] + * + * lang: Enter the language code following "Language" of the LanguageXX.php you + * want to check. If using linux you might need to follow case aka Zh and not + * zh. + * + * file: A php language file you want to include to compare mediawiki + * Language{Lang}.php against (for example Special:Allmessages PHP output). + * + * The goal is to get a list of messages not yet localised in a languageXX.php + * file using the language.php file as reference. + * + * The script then print a list of wgAllMessagesXX keys that aren't localised, a + * percentage of messages correctly localised and the number of messages to be + * translated. + * + * @package MediaWiki + * @subpackage Maintenance + */ + +/** This script run from the commandline */ +require_once( 'parserTests.inc' ); +require_once( 'commandLine.inc' ); + +if( isset($options['help']) ) { usage(); wfDie(); } + +$wgLanguageCode = ucfirstlcrest($wgLanguageCode); +/** Language messages we will use as reference. By default 'en' */ +$referenceMessages = $wgAllMessagesEn; +$referenceLanguage = 'En'; +$referenceFilename = 'Language'.$referenceLanguage.'.php'; +/** Language messages we will test. */ +$testMessages = array(); +$testLanguage = ''; +/** whereas we use an external language file */ +$externalRef = false; + +# FUNCTIONS +/** @todo more informations !! */ +function usage() { +echo 'php DiffLanguage.php [lang [file]] [--color=(yes|no|light)]'."\n"; +} + +/** Return a given string with first letter upper case, the rest lowercase */ +function ucfirstlcrest($string) { + return strtoupper(substr($string,0,1)).strtolower(substr($string,1)); +} + +/** + * Return a $wgAllmessages array shipped in MediaWiki + * @param string $languageCode Formated language code + * @return array The MediaWiki default $wgAllMessages array requested + */ +function getMediawikiMessages($languageCode = 'En') { + + $foo = "wgAllMessages$languageCode"; + global $$foo, $wgSkinNamesEn; + + // it might already be loaded in LocalSettings.php + if(!isset($$foo)) { + global $IP; + $langFile = $IP.'/languages/Language'.$languageCode.'.php'; + if (file_exists( $langFile ) ) { + print "Including $langFile\n"; + global $wgNamespaceNamesEn; + include($langFile); + } else wfDie("ERROR: The file $langFile does not exist !\n"); + } + return $$foo; +} + +/** + * Return a $wgAllmessages array in a given file. Language of the array + * need to be given cause we can not detect which language it provides + * @param string $filename Filename of the file containing a message array + * @param string $languageCode Language of the external array + * @return array A $wgAllMessages array from an external file. + */ +function getExternalMessages($filename, $languageCode) { + print "Including external file $filename.\n"; + include($filename); + $foo = "wgAllMessages$languageCode"; + return $$foo; +} + +# MAIN ENTRY +if ( isset($args[0]) ) { + $lang = ucfirstlcrest($args[0],1); + + // eventually against another language file we will use as reference instead + // of the default english language. + if( isset($args[1])) { + // we assume the external file contain an array of messages for the + // lang we are testing + $referenceMessages = getExternalMessages( $args[1], $lang ); + $referenceLanguage = $lang; + $referenceFilename = $args[1]; + $externalRef = true; + } + + // Load datas from MediaWiki + $testMessages = getMediawikiMessages($lang); + $testLanguage = $lang; +} else { + usage(); + wfDie(); +} + +/** parsertest is used to do differences */ +$myParserTest =& new ParserTest(); + +# Get all references messages and check if they exist in the tested language +$i = 0; + +$msg = "MW Language{$testLanguage}.php against "; +if($externalRef) { $msg .= 'external file '; } +else { $msg .= 'internal file '; } +$msg .= $referenceFilename.' ('.$referenceLanguage."):\n----\n"; +echo $msg; + +// process messages +foreach($referenceMessages as $index => $ref) +{ + // message is not localized + if(!(isset($testMessages[$index]))) { + $i++; + print "'$index' => \"$ref\",\n"; + // Messages in the same language differs + } elseif( ($lang == $referenceLanguage) AND ($testMessages[$index] != $ref)) { + print "\n$index differs:\n"; + print $myParserTest->quickDiff($testMessages[$index],$ref,'tested','reference'); + } +} + +echo "\n----\n".$msg; +echo "$referenceLanguage language is complete at ".number_format((100 - $i/count($wgAllMessagesEn) * 100),2)."%\n"; +echo "$i unlocalised messages of the ".count($wgAllMessagesEn)." messages available.\n"; +?> diff --git a/maintenance/dtrace/counts.d b/maintenance/dtrace/counts.d new file mode 100644 index 00000000..13725d99 --- /dev/null +++ b/maintenance/dtrace/counts.d @@ -0,0 +1,23 @@ +/* + * This software is in the public domain. + * + * $Id: counts.d 10510 2005-08-15 01:46:19Z kateturner $ + */ + +#pragma D option quiet + +self int tottime; +BEGIN { + tottime = timestamp; +} + +php$target:::function-entry + @counts[copyinstr(arg0)] = count(); +} + +END { + printf("Total time: %dus\n", (timestamp - tottime) / 1000); + printf("# calls by function:\n"); + printa("%-40s %@d\n", @counts); +} + diff --git a/maintenance/dtrace/tree.d b/maintenance/dtrace/tree.d new file mode 100644 index 00000000..2f16e41d --- /dev/null +++ b/maintenance/dtrace/tree.d @@ -0,0 +1,26 @@ +/* + * This software is in the public domain. + * + * $Id: tree.d 10510 2005-08-15 01:46:19Z kateturner $ + */ + +#pragma D option quiet + +self int indent; +self int times[int]; + +php$target:::function-entry +{ + @counts[copyinstr(arg0)] = count(); + printf("%*s", self->indent, ""); + printf("-> %s\n", copyinstr(arg0)); + self->times[self->indent] = timestamp; + self->indent += 2; +} + +php$target:::function-return +{ + self->indent -= 2; + printf("%*s", self->indent, ""); + printf("<- %s %dus\n", copyinstr(arg0), (timestamp - self->times[self->indent]) / 1000); +} diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php new file mode 100644 index 00000000..1735422d --- /dev/null +++ b/maintenance/dumpBackup.php @@ -0,0 +1,99 @@ +<?php +/** + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage SpecialPage + */ + +$originalDir = getcwd(); + +$optionsWithArgs = array( 'pagelist', 'start', 'end' ); + +require_once( 'commandLine.inc' ); +require_once( 'SpecialExport.php' ); +require_once( 'maintenance/backup.inc' ); + +$dumper = new BackupDumper( $argv ); + +if( isset( $options['quiet'] ) ) { + $dumper->reporting = false; +} + +if ( isset( $options['pagelist'] ) ) { + $olddir = getcwd(); + chdir( $originalDir ); + $pages = file( $options['pagelist'] ); + chdir( $olddir ); + if ( $pages === false ) { + wfDie( "Unable to open file {$options['pagelist']}\n" ); + } + $pages = array_map( 'trim', $pages ); + $dumper->pages = array_filter( $pages, create_function( '$x', 'return $x !== "";' ) ); +} + +if( isset( $options['start'] ) ) { + $dumper->startId = intval( $options['start'] ); +} +if( isset( $options['end'] ) ) { + $dumper->endId = intval( $options['end'] ); +} +$dumper->skipHeader = isset( $options['skip-header'] ); +$dumper->skipFooter = isset( $options['skip-footer'] ); + +$textMode = isset( $options['stub'] ) ? MW_EXPORT_STUB : MW_EXPORT_TEXT; + +if( isset( $options['full'] ) ) { + $dumper->dump( MW_EXPORT_FULL, $textMode ); +} elseif( isset( $options['current'] ) ) { + $dumper->dump( MW_EXPORT_CURRENT, $textMode ); +} else { + $dumper->progress( <<<END +This script dumps the wiki page database into an XML interchange wrapper +format for export or backup. + +XML output is sent to stdout; progress reports are sent to stderr. + +Usage: php dumpBackup.php <action> [<options>] +Actions: + --full Dump complete history of every page. + --current Includes only the latest revision of each page. + +Options: + --quiet Don't dump status reports to stderr. + --report=n Report position and speed after every n pages processed. + (Default: 100) + --server=h Force reading from MySQL server h + --start=n Start from page_id n + --end=n Stop before page_id n (exclusive) + --skip-header Don't output the <mediawiki> header + --skip-footer Don't output the </mediawiki> footer + --stub Don't perform old_text lookups; for 2-pass dump + +Fancy stuff: + --plugin=<class>[:<file>] Load a dump plugin class + --output=<type>:<file> Begin a filtered output stream; + <type>s: file, gzip, bzip2, 7zip + --filter=<type>[:<options>] Add a filter on an output branch + +END +); +} + +?> diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc new file mode 100644 index 00000000..2ed1e4a2 --- /dev/null +++ b/maintenance/dumpHTML.inc @@ -0,0 +1,650 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ + +define( 'REPORTING_INTERVAL', 10 ); + +require_once( 'includes/ImagePage.php' ); +require_once( 'includes/CategoryPage.php' ); +require_once( 'includes/RawPage.php' ); + +class DumpHTML { + # Destination directory + var $dest; + + # Show interlanguage links? + var $interwiki = true; + + # Depth of HTML directory tree + var $depth = 3; + + # Directory that commons images are copied into + var $sharedStaticPath; + + # Relative path to image directory + var $imageRel = 'upload'; + + # Copy commons images instead of symlinking + var $forceCopy = false; + + # Make links assuming the script path is in the same directory as + # the destination + var $alternateScriptPath = false; + + # Original values of various globals + var $oldArticlePath = false, $oldCopyrightIcon = false; + + # Has setupGlobals been called? + var $setupDone = false; + + # List of raw pages used in the current article + var $rawPages; + + # Skin to use + var $skin = 'dumphtml'; + + function DumpHTML( $settings ) { + foreach ( $settings as $var => $value ) { + $this->$var = $value; + } + } + + /** + * Write a set of articles specified by start and end page_id + * Skip categories and images, they will be done separately + */ + function doArticles( $start, $end = false ) { + $fname = 'DumpHTML::doArticles'; + + $this->setupGlobals(); + + if ( $end === false ) { + $dbr =& wfGetDB( DB_SLAVE ); + $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); + } + + $mainPageObj = Title::newMainPage(); + $mainPage = $mainPageObj->getPrefixedDBkey(); + + + for ($id = $start; $id <= $end; $id++) { + wfWaitForSlaves( 20 ); + if ( !($id % REPORTING_INTERVAL) ) { + print "Processing ID: $id\r"; + } + if ( !($id % (REPORTING_INTERVAL*10) ) ) { + print "\n"; + } + $title = Title::newFromID( $id ); + if ( $title ) { + $ns = $title->getNamespace() ; + if ( $ns != NS_CATEGORY && $title->getPrefixedDBkey() != $mainPage ) { + $this->doArticle( $title ); + } + } + } + print "\n"; + } + + function doSpecials() { + $this->doMainPage(); + + $this->setupGlobals(); + print "Special:Categories..."; + $this->doArticle( Title::makeTitle( NS_SPECIAL, 'Categories' ) ); + print "\n"; + } + + /** Write the main page as index.html */ + function doMainPage() { + + print "Making index.html "; + + // Set up globals with no ../../.. in the link URLs + $this->setupGlobals( 0 ); + + $title = Title::newMainPage(); + $text = $this->getArticleHTML( $title ); + $file = fopen( "{$this->dest}/index.html", "w" ); + if ( !$file ) { + print "\nCan't open index.html for writing\n"; + return false; + } + fwrite( $file, $text ); + fclose( $file ); + print "\n"; + } + + function doImageDescriptions() { + global $wgSharedUploadDirectory; + + $fname = 'DumpHTML::doImageDescriptions'; + + $this->setupGlobals(); + + /** + * Dump image description pages that don't have an associated article, but do + * have a local image + */ + $dbr =& wfGetDB( DB_SLAVE ); + extract( $dbr->tableNames( 'image', 'page' ) ); + $res = $dbr->select( 'image', array( 'img_name' ), false, $fname ); + + $i = 0; + print "Writing image description pages for local images\n"; + $num = $dbr->numRows( $res ); + while ( $row = $dbr->fetchObject( $res ) ) { + wfWaitForSlaves( 10 ); + if ( !( ++$i % REPORTING_INTERVAL ) ) { + print "Done $i of $num\r"; + } + $title = Title::makeTitle( NS_IMAGE, $row->img_name ); + if ( $title->getArticleID() ) { + // Already done by dumpHTML + continue; + } + $this->doArticle( $title ); + } + print "\n"; + + /** + * Dump images which only have a real description page on commons + */ + print "Writing description pages for commons images\n"; + $i = 0; + for ( $hash = 0; $hash < 256; $hash++ ) { + $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash ); + $paths = array_merge( glob( "{$this->sharedStaticPath}/$dir/*" ), + glob( "{$this->sharedStaticPath}/thumb/$dir/*" ) ); + + foreach ( $paths as $path ) { + $file = basename( $path ); + if ( !(++$i % REPORTING_INTERVAL ) ) { + print "$i\r"; + } + + $title = Title::makeTitle( NS_IMAGE, $file ); + $this->doArticle( $title ); + } + } + print "\n"; + } + + function doCategories() { + $fname = 'DumpHTML::doCategories'; + $this->setupGlobals(); + + $dbr =& wfGetDB( DB_SLAVE ); + print "Selecting categories..."; + $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' ); + $res = $dbr->query( $sql, $fname ); + + print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; + $i = 0; + while ( $row = $dbr->fetchObject( $res ) ) { + wfWaitForSlaves( 10 ); + if ( !(++$i % REPORTING_INTERVAL ) ) { + print "$i\r"; + } + $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); + $this->doArticle( $title ); + } + print "\n"; + } + + function doRedirects() { + print "Doing redirects...\n"; + $fname = 'DumpHTML::doRedirects'; + $this->setupGlobals(); + $dbr =& wfGetDB( DB_SLAVE ); + + $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ), + array( 'page_is_redirect' => 1 ), $fname ); + $num = $dbr->numRows( $res ); + print "$num redirects to do...\n"; + $i = 0; + while ( $row = $dbr->fetchObject( $res ) ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + if ( !(++$i % (REPORTING_INTERVAL*10) ) ) { + print "Done $i of $num\n"; + } + $this->doArticle( $title ); + } + } + + /** Write an article specified by title */ + function doArticle( $title ) { + global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; + global $wgUploadDirectory; + + $this->rawPages = array(); + $text = $this->getArticleHTML( $title ); + + if ( $text === false ) { + return; + } + + # Parse the XHTML to find the images + $images = $this->findImages( $text ); + $this->copyImages( $images ); + + # Write to file + $this->writeArticle( $title, $text ); + + # Do raw pages + wfMkdirParents( "{$this->dest}/raw", 0755 ); + foreach( $this->rawPages as $record ) { + list( $file, $title, $params ) = $record; + + $path = "{$this->dest}/raw/$file"; + if ( !file_exists( $path ) ) { + $article = new Article( $title ); + $request = new FauxRequest( $params ); + $rp = new RawPage( $article, $request ); + $text = $rp->getRawText(); + + print "Writing $file\n"; + $file = fopen( $path, 'w' ); + if ( !$file ) { + print("Can't open file $fullName for writing\n"); + continue; + } + fwrite( $file, $text ); + fclose( $file ); + } + } + } + + /** Write the given text to the file identified by the given title object */ + function writeArticle( &$title, $text ) { + $filename = $this->getHashedFilename( $title ); + $fullName = "{$this->dest}/$filename"; + $fullDir = dirname( $fullName ); + + wfMkdirParents( $fullDir, 0755 ); + + $file = fopen( $fullName, 'w' ); + if ( !$file ) { + print("Can't open file $fullName for writing\n"); + return; + } + + fwrite( $file, $text ); + fclose( $file ); + } + + /** Set up globals required for parsing */ + function setupGlobals( $currentDepth = NULL ) { + global $wgUser, $wgTitle, $wgStylePath, $wgArticlePath, $wgMathPath; + global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; + global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; + global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer; + global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon; + + static $oldLogo = NULL; + + if ( !$this->setupDone ) { + $wgHooks['GetLocalURL'][] =& $this; + $wgHooks['GetFullURL'][] =& $this; + $this->oldArticlePath = $wgServer . $wgArticlePath; + } + + if ( is_null( $currentDepth ) ) { + $currentDepth = $this->depth; + } + + if ( $this->alternateScriptPath ) { + if ( $currentDepth == 0 ) { + $wgScriptPath = '.'; + } else { + $wgScriptPath = '..' . str_repeat( '/..', $currentDepth - 1 ); + } + } else { + $wgScriptPath = '..' . str_repeat( '/..', $currentDepth ); + } + + $wgArticlePath = str_repeat( '../', $currentDepth ) . '$1'; + + # Logo image + # Allow for repeated setup + if ( !is_null( $oldLogo ) ) { + $wgLogo = $oldLogo; + } else { + $oldLogo = $wgLogo; + } + + if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) { + # If it's in the upload directory, rewrite it to the new upload directory + $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 ); + } elseif ( $wgLogo{0} == '/' ) { + # This is basically heuristic + # Rewrite an absolute logo path to one relative to the the script path + $wgLogo = $wgScriptPath . $wgLogo; + } + + # Another ugly hack + if ( !$this->setupDone ) { + $this->oldCopyrightIcon = $wgCopyrightIcon; + } + $wgCopyrightIcon = str_replace( 'src="/images', + 'src="' . htmlspecialchars( $wgScriptPath ) . '/images', $this->oldCopyrightIcon ); + + + + $wgStylePath = "$wgScriptPath/skins"; + $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; + $wgSharedUploadPath = "$wgUploadPath/shared"; + $wgMaxCredits = -1; + $wgHideInterlanguageLinks = !$this->interwiki; + $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; + $wgEnableParserCache = false; + $wgMathPath = "$wgScriptPath/math"; + + if ( !empty( $wgRightsText ) ) { + $wgRightsUrl = "$wgScriptPath/COPYING.html"; + } + + $wgUser = new User; + $wgUser->setOption( 'skin', $this->skin ); + $wgUser->setOption( 'editsection', 0 ); + + $this->sharedStaticPath = "$wgUploadDirectory/shared"; + + $this->setupDone = true; + } + + /** Reads the content of a title object, executes the skin and captures the result */ + function getArticleHTML( &$title ) { + global $wgOut, $wgTitle, $wgArticle, $wgUser; + + $linkCache =& LinkCache::singleton(); + $linkCache->clear(); + $wgTitle = $title; + if ( is_null( $wgTitle ) ) { + return false; + } + + $ns = $wgTitle->getNamespace(); + if ( $ns == NS_SPECIAL ) { + $wgOut = new OutputPage; + $wgOut->setParserOptions( new ParserOptions ); + SpecialPage::executePath( $wgTitle ); + } else { + /** @todo merge with Wiki.php code */ + if ( $ns == NS_IMAGE ) { + $wgArticle = new ImagePage( $wgTitle ); + } elseif ( $ns == NS_CATEGORY ) { + $wgArticle = new CategoryPage( $wgTitle ); + } else { + $wgArticle = new Article( $wgTitle ); + } + $rt = Title::newFromRedirect( $wgArticle->fetchContent() ); + if ( $rt != NULL ) { + return $this->getRedirect( $rt ); + } else { + $wgOut = new OutputPage; + $wgOut->setParserOptions( new ParserOptions ); + + $wgArticle->view(); + } + } + + $sk =& $wgUser->getSkin(); + ob_start(); + $sk->outputPage( $wgOut ); + $text = ob_get_contents(); + ob_end_clean(); + + return $text; + } + + function getRedirect( $rt ) { + $url = $rt->escapeLocalURL(); + $text = $rt->getPrefixedText(); + return <<<ENDTEXT +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> + <meta http-equiv="Refresh" content="0;url=$url" /> +</head> +<body> + <p>Redirecting to <a href="$url">$text</a></p> +</body> +</html> +ENDTEXT; + } + + /** Returns image paths used in an XHTML document */ + function findImages( $text ) { + global $wgOutputEncoding, $wgDumpImages; + $parser = xml_parser_create( $wgOutputEncoding ); + xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' ); + + $wgDumpImages = array(); + xml_parse( $parser, $text ); + xml_parser_free( $parser ); + + return $wgDumpImages; + } + + /** + * Copy images (or create symlinks) from commons to a static directory. + * This is necessary even if you intend to distribute all of commons, because + * the directory contents is used to work out which image description pages + * are needed. + * + * Also copies math images + * + */ + function copyImages( $images ) { + global $wgSharedUploadPath, $wgSharedUploadDirectory, $wgMathPath, $wgMathDirectory; + # Find shared uploads and copy them into the static directory + $sharedPathLength = strlen( $wgSharedUploadPath ); + $mathPathLength = strlen( $wgMathPath ); + foreach ( $images as $escapedImage => $dummy ) { + $image = urldecode( $escapedImage ); + + # Is it shared? + if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) { + # Reconstruct full filename + $rel = substr( $image, $sharedPathLength + 1 ); // +1 for slash + $sourceLoc = "$wgSharedUploadDirectory/$rel"; + $staticLoc = "{$this->sharedStaticPath}/$rel"; + #print "Copying $sourceLoc to $staticLoc\n"; + # Copy to static directory + if ( !file_exists( $staticLoc ) ) { + wfMkdirParents( dirname( $staticLoc ), 0755 ); + if ( function_exists( 'symlink' ) && !$this->forceCopy ) { + symlink( $sourceLoc, $staticLoc ); + } else { + copy( $sourceLoc, $staticLoc ); + } + } + + if ( substr( $rel, 0, 6 ) == 'thumb/' ) { + # That was a thumbnail + # We will also copy the real image + $parts = explode( '/', $rel ); + $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}"; + $sourceLoc = "$wgSharedUploadDirectory/$rel"; + $staticLoc = "{$this->sharedStaticPath}/$rel"; + #print "Copying $sourceLoc to $staticLoc\n"; + if ( !file_exists( $staticLoc ) ) { + wfMkdirParents( dirname( $staticLoc ), 0755 ); + if ( function_exists( 'symlink' ) && !$this->forceCopy ) { + symlink( $sourceLoc, $staticLoc ); + } else { + copy( $sourceLoc, $staticLoc ); + } + } + } + } else + # Is it math? + if ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) { + $rel = substr( $image, $mathPathLength + 1 ); // +1 for slash + $source = "$wgMathDirectory/$rel"; + $dest = "{$this->dest}/math/$rel"; + @mkdir( "{$this->dest}/math", 0755 ); + if ( !file_exists( $dest ) ) { + copy( $source, $dest ); + } + } + } + } + + function onGetFullURL( &$title, &$url, $query ) { + global $wgContLang, $wgArticlePath; + + $iw = $title->getInterwiki(); + if ( $title->isExternal() && $wgContLang->getLanguageName( $iw ) ) { + if ( $title->getDBkey() == '' ) { + $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath ); + } else { + $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ), + $wgArticlePath ); + } + return false; + } else { + return true; + } + } + + function onGetLocalURL( &$title, &$url, $query ) { + global $wgArticlePath; + + if ( $title->isExternal() ) { + # Default is fine for interwiki + return true; + } + + $url = false; + if ( $query != '' ) { + parse_str( $query, $params ); + if ( isset($params['action']) && $params['action'] == 'raw' ) { + if ( $params['gen'] == 'css' || $params['gen'] == 'js' ) { + $file = 'gen.' . $params['gen']; + } else { + $file = $this->getFriendlyName( $title->getPrefixedDBkey() ); + // Clean up Monobook.css etc. + if ( preg_match( '/^(.*)\.(css|js)_[0-9a-f]{4}$/', $file, $matches ) ) { + $file = $matches[1] . '.' . $matches[2]; + } + } + $this->rawPages[$file] = array( $file, $title, $params ); + $url = str_replace( '$1', "raw/" . wfUrlencode( $file ), $wgArticlePath ); + } + } + if ( $url === false ) { + $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); + } + + return false; + } + + function getHashedFilename( &$title ) { + if ( '' != $title->mInterwiki ) { + $dbkey = $title->getDBkey(); + } else { + $dbkey = $title->getPrefixedDBkey(); + } + + $mainPage = Title::newMainPage(); + if ( $mainPage->getPrefixedDBkey() == $dbkey ) { + return 'index.html'; + } + + return $this->getHashedDirectory( $title ) . '/' . + $this->getFriendlyName( $dbkey ) . '.html'; + } + + function getFriendlyName( $name ) { + global $wgLang; + # Replace illegal characters for Windows paths with underscores + $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' ); + + # Work out lower case form. We assume we're on a system with case-insensitive + # filenames, so unless the case is of a special form, we have to disambiguate + if ( function_exists( 'mb_strtolower' ) ) { + $lowerCase = $wgLang->ucfirst( mb_strtolower( $name ) ); + } else { + $lowerCase = ucfirst( strtolower( $name ) ); + } + + # Make it mostly unique + if ( $lowerCase != $friendlyName ) { + $friendlyName .= '_' . substr(md5( $name ), 0, 4); + } + # Handle colon specially by replacing it with tilde + # Thus we reduce the number of paths with hashes appended + $friendlyName = str_replace( ':', '~', $friendlyName ); + + return $friendlyName; + } + + /** + * Get a relative directory for putting a title into + */ + function getHashedDirectory( &$title ) { + if ( '' != $title->getInterwiki() ) { + $pdbk = $title->getDBkey(); + } else { + $pdbk = $title->getPrefixedDBkey(); + } + + # Find the first colon if there is one, use characters after it + $p = strpos( $pdbk, ':' ); + if ( $p !== false ) { + $dbk = substr( $pdbk, $p + 1 ); + $dbk = substr( $dbk, strspn( $dbk, '_' ) ); + } else { + $dbk = $pdbk; + } + + # Split into characters + preg_match_all( '/./us', $dbk, $m ); + + $chars = $m[0]; + $length = count( $chars ); + $dir = ''; + + for ( $i = 0; $i < $this->depth; $i++ ) { + if ( $i ) { + $dir .= '/'; + } + if ( $i >= $length ) { + $dir .= '_'; + } else { + $c = $chars[$i]; + if ( ord( $c ) >= 128 || preg_match( '/[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/', $c ) ) { + if ( function_exists( 'mb_strtolower' ) ) { + $dir .= mb_strtolower( $c ); + } else { + $dir .= strtolower( $c ); + } + } else { + $dir .= sprintf( "%02X", ord( $c ) ); + } + } + } + return $dir; + } + +} + +/** XML parser callback */ +function wfDumpStartTagHandler( $parser, $name, $attribs ) { + global $wgDumpImages; + + if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) { + $wgDumpImages[$attribs['SRC']] = true; + } +} + +/** XML parser callback */ +function wfDumpEndTagHandler( $parser, $name ) {} + +# vim: syn=php +?> diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php new file mode 100644 index 00000000..37a46465 --- /dev/null +++ b/maintenance/dumpHTML.php @@ -0,0 +1,131 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** + * Usage: + * php dumpHTML.php [options...] + * + * -d <dest> destination directory + * -s <start> start ID + * -e <end> end ID + * -k <skin> skin to use (defaults to dumphtml) + * --images only do image description pages + * --categories only do category pages + * --redirects only do redirects + * --special only do miscellaneous stuff + * --force-copy copy commons instead of symlink, needed for Wikimedia + * --interlang allow interlanguage links + */ + + +$optionsWithArgs = array( 's', 'd', 'e', 'k' ); + +$profiling = false; + +if ( $profiling ) { + define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' ); + function wfSetupDump() { + global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate; + $wgProfiling = true; + $wgProfileToDatabase = false; + $wgProfileSampleRate = 1; + } +} + +require_once( "commandLine.inc" ); +require_once( "dumpHTML.inc" ); + +error_reporting( E_ALL & (~E_NOTICE) ); +define( 'CHUNK_SIZE', 50 ); + +if ( !empty( $options['s'] ) ) { + $start = $options['s']; +} else { + $start = 1; +} + +if ( !empty( $options['e'] ) ) { + $end = $options['e']; +} else { + $dbr =& wfGetDB( DB_SLAVE ); + $end = $dbr->selectField( 'page', 'max(page_id)', false ); +} + +if ( !empty( $options['d'] ) ) { + $dest = $options['d']; +} else { + $dest = 'static'; +} + +$skin = isset( $options['k'] ) ? $options['k'] : 'dumphtml'; + +$wgHTMLDump = new DumpHTML( array( + 'dest' => $dest, + 'forceCopy' => $options['force-copy'], + 'alternateScriptPath' => $options['interlang'], + 'interwiki' => $options['interlang'], + 'skin' => $skin, +)); + + +if ( $options['special'] ) { + $wgHTMLDump->doSpecials(); +} elseif ( $options['images'] ) { + $wgHTMLDump->doImageDescriptions(); +} elseif ( $options['categories'] ) { + $wgHTMLDump->doCategories(); +} elseif ( $options['redirects'] ) { + $wgHTMLDump->doRedirects(); +} else { + print("Creating static HTML dump in directory $dest. \n". + "Starting from page_id $start of $end.\n"); + + $dbr =& wfGetDB( DB_SLAVE ); + $server = $dbr->getProperty( 'mServer' ); + print "Using database {$server}\n"; + + $wgHTMLDump->doArticles( $start, $end ); + if ( !isset( $options['e'] ) ) { + $wgHTMLDump->doImageDescriptions(); + $wgHTMLDump->doCategories(); + $wgHTMLDump->doSpecials(); + } + + /* + if ( $end - $start > CHUNK_SIZE * 2 ) { + // Split the problem into smaller chunks, run them in different PHP instances + // This is a memory/resource leak workaround + print("Creating static HTML dump in directory $dest. \n". + "Starting from page_id $start of $end.\n"); + + chdir( "maintenance" ); + for ( $chunkStart = $start; $chunkStart < $end; $chunkStart += CHUNK_SIZE ) { + $chunkEnd = $chunkStart + CHUNK_SIZE - 1; + if ( $chunkEnd > $end ) { + $chunkEnd = $end; + } + passthru( "php dumpHTML.php -d " . wfEscapeShellArg( $dest ) . " -s $chunkStart -e $chunkEnd" ); + } + chdir( ".." ); + $d->doImageDescriptions(); + $d->doCategories(); + $d->doMainPage( $dest ); + } else { + $d->doArticles( $start, $end ); + } + */ +} + +if ( isset( $options['debug'] ) ) { + print_r($GLOBALS); +} + +if ( $profiling ) { + echo $wgProfiler->getOutput(); +} + +?> diff --git a/maintenance/dumpInterwiki.inc b/maintenance/dumpInterwiki.inc new file mode 100644 index 00000000..3cca1e02 --- /dev/null +++ b/maintenance/dumpInterwiki.inc @@ -0,0 +1,219 @@ +<?php +/** + * Rebuild interwiki table using the file on meta and the language list + * Wikimedia specific! + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ + +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ +class Site { + var $suffix, $lateral, $url; + + function Site( $s, $l, $u ) { + $this->suffix = $s; + $this->lateral = $l; + $this->url = $u; + } + + function getURL( $lang ) { + $xlang = str_replace( '_', '-', $lang ); + return "http://$xlang.{$this->url}/wiki/\$1"; + } +} + +function getRebuildInterwikiDump() { + global $langlist, $languageAliases, $prefixRewrites, $wgDBname; + + # Multi-language sites + # db suffix => db suffix, iw prefix, hostname + $sites = array( + 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ), + 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ), + 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ), + 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ), + 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), + 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), + 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), + ); + + # List of language prefixes likely to be found in multi-language sites + $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) ); + + # List of all database names + $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) ); + + # Special-case hostnames + $specials = array( + 'sourceswiki' => 'sources.wikipedia.org', + 'quotewiki' => 'wikiquote.org', + 'textbookwiki' => 'wikibooks.org', + 'sep11wiki' => 'sep11.wikipedia.org', + 'metawiki' => 'meta.wikimedia.org', + 'commonswiki' => 'commons.wikimedia.org', + ); + + # Extra interwiki links that can't be in the intermap for some reason + $extraLinks = array( + array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ), + array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ), + array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ), + ); + + # Language aliases, usually configured as redirects to the real wiki in apache + # Interlanguage links are made directly to the real wiki + # Something horrible happens if you forget to list an alias here, I can't + # remember what + $languageAliases = array( + 'zh-cn' => 'zh', + 'zh-tw' => 'zh', + 'dk' => 'da', + 'nb' => 'no', + ); + + # Special case prefix rewrites, for the benefit of Swedish which uses s:t + # as an abbreviation for saint + $prefixRewrites = array( + 'svwiki' => array ( 's' => 'src'), + ); + + # Construct a list of reserved prefixes + $reserved = array(); + foreach ( $langlist as $lang ) { + $reserved[$lang] = 1; + } + foreach ( $languageAliases as $alias => $lang ) { + $reserved[$alias] = 1; + } + foreach( $sites as $site ) { + $reserved[$site->lateral] = 1; + } + + # Extract the intermap from meta + $intermap = wfGetHTTP( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 ); + $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); + + if ( !$lines || count( $lines ) < 2 ) { + wfDie( "m:Interwiki_map not found" ); + } + + $iwArray = array(); + # Global iterwiki map + foreach ( $lines as $line ) { + if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(.*?)\s*$/', $line, $matches ) ) { + $prefix = strtolower( $matches[1] ); + $url = $matches[2]; + if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) { + $local = 1; + } else { + $local = 0; + } + + if ( empty( $reserved[$prefix] ) ) { + $imap = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local ); + makeLink ($imap, "__global"); + } + } + } + + # Exclude Wikipedia for Wikipedia + makeLink ( array ('iw_prefix' => 'wikipedia', 'is_url' => null ), "_wiki" ); + + #Multilanguage sites + foreach ($sites as $site) + $sql .= makeLanguageLinks ( $site, "_".$site->suffix ); + + + foreach ( $dblist as $db ) { + if ( isset( $specials[$db] ) ) { + # Special wiki + # Has interwiki links and interlanguage links to wikipedia + + makeLink( array( 'iw_prefix' => $db, 'iw_url' => "wiki"), "__sites" ); + # Links to multilanguage sites + foreach ( $sites as $targetSite ) { + makeLink( array( 'iw_prefix' => $targetSite->lateral, + 'iw_url' =>$targetSite->getURL( 'en' ), + 'iw_local' => 1 ), $db ); + } + + } else { + # Find out which site this DB belongs to + $site = false; + foreach( $sites as $candidateSite ) { + $suffix = $candidateSite->suffix; + if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) { + $site = $candidateSite; + break; + } + } + makeLink( array( 'iw_prefix' => $db, 'iw_url' => $site->suffix), "__sites" ); + if ( !$site ) { + print "Invalid database $db\n"; + continue; + } + $lang = $matches[1]; + $host = "$lang." . $site->url; + + # Lateral links + foreach ( $sites as $targetSite ) { + if ( $targetSite->suffix != $site->suffix ) { + makeLink( array( 'iw_prefix' => $targetSite->lateral, + 'iw_url' => $targetSite->getURL( $lang ), + 'iw_local' => 1 ), $db ); + } + } + + if ( $site->suffix == "wiki" ) { + makeLink( array('iw_prefix' => 'w', + 'iw_url' => "http://en.wikipedia.org/wiki/$1", + 'iw_local' => 1), $db ); + } + + } + } + foreach ( $extraLinks as $link ) + makeLink( $link, "__global" ); +} + +# ------------------------------------------------------------------------------------------ + +# Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site +function makeLanguageLinks( &$site, $source ) { + global $langlist, $languageAliases; + # Actual languages with their own databases + foreach ( $langlist as $targetLang ) { + makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $source ); + } + + # Language aliases + foreach ( $languageAliases as $alias => $lang ) { + makeLink( array( $alias, $site->getURL( $lang ), 1 ), $source ); + } +} + +function makeLink( $entry, $source ) { + global $prefixRewrites, $dbFile; + if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) ) + $entry[0] = $prefixRewrites[$source][$entry[0]]; + if (!array_key_exists("iw_prefix",$entry)) + $entry = array("iw_prefix" => $entry[0], "iw_url" => $entry[1], "iw_local" => $entry[2]); + if ( array_key_exists($source,$prefixRewrites) && + array_key_exists($entry['iw_prefix'],$prefixRewrites[$source])) + $entry['iw_prefix'] = $prefixRewrites[$source][$entry['iw_prefix']]; + if ($dbFile) + dba_insert("{$source}:{$entry['iw_prefix']}", trim("{$entry['iw_local']} {$entry['iw_url']}"),$dbFile); + else + print "{$source}:{$entry['iw_prefix']} {$entry['iw_url']} {$entry['iw_local']}\n"; + + } + +?> diff --git a/maintenance/dumpInterwiki.php b/maintenance/dumpInterwiki.php new file mode 100644 index 00000000..411260ac --- /dev/null +++ b/maintenance/dumpInterwiki.php @@ -0,0 +1,25 @@ +<?php +/** + * Rebuild interwiki table using the file on meta and the language list + * Wikimedia specific! + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$oldCwd = getcwd(); + +$optionsWithArgs = array( "o" ); +include_once( "commandLine.inc" ); +include_once( "dumpInterwiki.inc" ); +chdir( $oldCwd ); + +# Output +if ( isset( $options['o'] ) ) { + # To database specified with -o + $dbFile = dba_open( $options['o'], "n", "cdb_make" ); +} + +getRebuildInterwikiDump(); +?> diff --git a/maintenance/dumpLinks.php b/maintenance/dumpLinks.php new file mode 100644 index 00000000..f040f390 --- /dev/null +++ b/maintenance/dumpLinks.php @@ -0,0 +1,63 @@ +<?php +/** + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * Quick demo hack to generate a plaintext link dump, + * per the proposed wiki link database standard: + * http://www.usemod.com/cgi-bin/mb.pl?LinkDatabase + * + * Includes all (live and broken) intra-wiki links. + * Does not include interwiki or URL links. + * Dumps ASCII text to stdout; command-line. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage SpecialPage + */ + +require_once 'commandLine.inc'; + +$dbr =& wfGetDB( DB_SLAVE ); +$result = $dbr->select( array( 'pagelinks', 'page' ), + array( + 'page_id', + 'page_namespace', + 'page_title', + 'pl_namespace', + 'pl_title' ), + array( 'page_id=pl_from' ), + 'dumpLinks', + array( 'ORDER BY page_id' ) ); + +$lastPage = null; +while( $row = $dbr->fetchObject( $result ) ) { + if( $lastPage != $row->page_id ) { + if( isset( $lastPage ) ) { + print "\n"; + } + $page = Title::makeTitle( $row->page_namespace, $row->page_title ); + print $page->getPrefixedUrl(); + $lastPage = $row->page_id; + } + $link = Title::makeTitle( $row->pl_namespace, $row->pl_title ); + print " " . $link->getPrefixedUrl(); +} +if( isset( $lastPage ) ) + print "\n"; + +?> diff --git a/maintenance/dumpMessages.php b/maintenance/dumpMessages.php new file mode 100644 index 00000000..84ecc4c6 --- /dev/null +++ b/maintenance/dumpMessages.php @@ -0,0 +1,19 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); +$wgMessageCache->disableTransform(); +$messages = array(); +foreach ( $wgAllMessagesEn as $key => $englishValue ) +{ + $messages[$key] = wfMsg( $key ); +} +print "MediaWiki $wgVersion language file\n"; +print serialize( $messages ); + +?> diff --git a/maintenance/dumpReplayLog.php b/maintenance/dumpReplayLog.php new file mode 100644 index 00000000..aa1d5b9a --- /dev/null +++ b/maintenance/dumpReplayLog.php @@ -0,0 +1,118 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ +error_reporting(E_ALL); + +/** */ +require_once( "commandLine.inc" ); +require_once( 'includes/SpecialExport.php' ); + +/** */ +function dumpReplayLog( $start ) { + $dbw =& wfGetDB( DB_MASTER ); + $recentchanges = $dbw->tableName( 'recentchanges' ); + $result =& $dbw->safeQuery( "SELECT * FROM $recentchanges WHERE rc_timestamp >= " + . $dbw->timestamp( $start ) . ' ORDER BY rc_timestamp'); + + global $wgInputEncoding; + echo '<' . '?xml version="1.0" encoding="' . $wgInputEncoding . '" ?' . ">\n"; + echo "<wikilog version='experimental'>\n"; + echo "<!-- Do not use this script for any purpose. It's scary. -->\n"; + while( $row = $dbw->fetchObject( $result ) ) { + echo dumpReplayEntry( $row ); + } + echo "</wikilog>\n"; + $dbw->freeResult( $result ); +} + +/** */ +function dumpReplayEntry( $row ) { + $title = Title::MakeTitle( $row->rc_namespace, $row->rc_title ); + switch( $row->rc_type ) { + case RC_EDIT: + case RC_NEW: + # Edit + $dbr =& wfGetDB( DB_MASTER ); + + $out = " <edit>\n"; + $out .= " <title>" . xmlsafe( $title->getPrefixedText() ) . "</title>\n"; + + # Get previous edit timestamp + if( $row->rc_last_oldid ) { + $s = $dbr->selectRow( 'old', + array( 'old_timestamp' ), + array( 'old_id' => $row->rc_last_oldid ) ); + $out .= " <lastedit>" . wfTimestamp2ISO8601( $s->old_timestamp ) . "</lastedit>\n"; + } else { + $out .= " <newpage/>\n"; + } + + if( $row->rc_this_oldid ) { + $s = $dbr->selectRow( 'old', array( 'old_id as id','old_timestamp as timestamp', + 'old_user as user', 'old_user_text as user_text', 'old_comment as comment', + 'old_text as text', 'old_flags as flags' ), + array( 'old_id' => $row->rc_this_oldid ) ); + $out .= revision2xml( $s, true, false ); + } else { + $s = $dbr->selectRow( 'cur', array( 'cur_id as id','cur_timestamp as timestamp','cur_user as user', + 'cur_user_text as user_text', 'cur_restrictions as restrictions','cur_comment as comment', + 'cur_text as text' ), + array( 'cur_id' => $row->rc_cur_id ) ); + $out .= revision2xml( $s, true, true ); + } + $out .= " </edit>\n"; + break; + case RC_LOG: + $dbr =& wfGetDB( DB_MASTER ); + $s = $dbr->selectRow( 'logging', + array( 'log_type', 'log_action', 'log_timestamp', 'log_user', + 'log_namespace', 'log_title', 'log_comment' ), + array( 'log_timestamp' => $row->rc_timestamp, + 'log_user' => $row->rc_user ) ); + $ts = wfTimestamp2ISO8601( $row->rc_timestamp ); + $target = Title::MakeTitle( $s->log_namespace, $s->log_title ); + $out = " <log>\n"; + $out .= " <type>" . xmlsafe( $s->log_type ) . "</type>\n"; + $out .= " <action>" . xmlsafe( $s->log_action ) . "</action>\n"; + $out .= " <timestamp>" . $ts . "</timestamp>\n"; + $out .= " <contributor><username>" . xmlsafe( $row->rc_user_text ) . "</username></contributor>\n"; + $out .= " <target>" . xmlsafe( $target->getPrefixedText() ) . "</target>\n"; + $out .= " <comment>" . xmlsafe( $s->log_comment ) . "</comment>\n"; + $out .= " </log>\n"; + break; + case RC_MOVE: + case RC_MOVE_OVER_REDIRECT: + $target = Title::MakeTitle( $row->rc_moved_to_ns, $row->rc_moved_to_title ); + $out = " <move>\n"; + $out .= " <title>" . xmlsafe( $title->getPrefixedText() ) . "</title>\n"; + $out .= " <target>" . xmlsafe( $target->getPrefixedText() ) . "</target>\n"; + if( $row->rc_type == RC_MOVE_OVER_REDIRECT ) { + $out .= " <override/>\n"; + } + $ts = wfTimestamp2ISO8601( $row->rc_timestamp ); + $out .= " <id>$row->rc_cur_id</id>\n"; + $out .= " <timestamp>$ts</timestamp>\n"; + if($row->rc_user_text) { + $u = "<username>" . xmlsafe( $row->rc_user_text ) . "</username>"; + $u .= "<id>$row->rc_user</id>"; + } else { + $u = "<ip>" . xmlsafe( $row->rc_user_text ) . "</ip>"; + } + $out .= " <contributor>$u</contributor>\n"; + $out .= " </move>\n"; + } + return $out; +} + + +if( isset( $options['start'] ) ) { + $start = wfTimestamp( TS_MW, $options['start'] ); + dumpReplayLog( $start ); +} else { + echo "This is an experimental script to encapsulate data from recent edits.\n"; + echo "Usage: php dumpReplayLog.php --start=20050118032544\n"; +} + +?>
\ No newline at end of file diff --git a/maintenance/dumpTextPass.php b/maintenance/dumpTextPass.php new file mode 100644 index 00000000..78367c0b --- /dev/null +++ b/maintenance/dumpTextPass.php @@ -0,0 +1,347 @@ +<?php +/** + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage SpecialPage + */ + +$originalDir = getcwd(); + +require_once( 'commandLine.inc' ); +require_once( 'SpecialExport.php' ); +require_once( 'maintenance/backup.inc' ); + +/** + * Stream wrapper around 7za filter program. + * Required since we can't pass an open file resource to XMLReader->open() + * which is used for the text prefetch. + */ +class SevenZipStream { + var $stream; + + private function stripPath( $path ) { + $prefix = 'mediawiki.compress.7z://'; + return substr( $path, strlen( $prefix ) ); + } + + function stream_open( $path, $mode, $options, &$opened_path ) { + if( $mode{0} == 'r' ) { + $options = 'e -bd -so'; + } elseif( $mode{0} == 'w' ) { + $options = 'a -bd -si'; + } else { + return false; + } + $arg = wfEscapeShellArg( $this->stripPath( $path ) ); + $command = "7za $options $arg"; + if( !wfIsWindows() ) { + // Suppress the stupid messages on stderr + $command .= ' 2>/dev/null'; + } + $this->stream = popen( $command, $mode ); + return ($this->stream !== false); + } + + function url_stat( $path, $flags ) { + return stat( $this->stripPath( $path ) ); + } + + // This is all so lame; there should be a default class we can extend + + function stream_close() { + return fclose( $this->stream ); + } + + function stream_flush() { + return fflush( $this->stream ); + } + + function stream_read( $count ) { + return fread( $this->stream, $count ); + } + + function stream_write( $data ) { + return fwrite( $this->stream, $data ); + } + + function stream_tell() { + return ftell( $this->stream ); + } + + function stream_eof() { + return feof( $this->stream ); + } + + function stream_seek( $offset, $whence ) { + return fseek( $this->stream, $offset, $whence ); + } +} +stream_wrapper_register( 'mediawiki.compress.7z', 'SevenZipStream' ); + + +class TextPassDumper extends BackupDumper { + var $prefetch = null; + var $input = "php://stdin"; + var $history = MW_EXPORT_FULL; + var $fetchCount = 0; + var $prefetchCount = 0; + + function dump() { + # This shouldn't happen if on console... ;) + header( 'Content-type: text/html; charset=UTF-8' ); + + # Notice messages will foul up your XML output even if they're + # relatively harmless. +// ini_set( 'display_errors', false ); + + $this->initProgress( $this->history ); + + $this->db =& $this->backupDb(); + + $this->egress = new ExportProgressFilter( $this->sink, $this ); + + $input = fopen( $this->input, "rt" ); + $result = $this->readDump( $input ); + + if( WikiError::isError( $result ) ) { + wfDie( $result->getMessage() ); + } + + $this->report( true ); + } + + function processOption( $opt, $val, $param ) { + $url = $this->processFileOpt( $val, $param ); + + switch( $opt ) { + case 'prefetch': + require_once 'maintenance/backupPrefetch.inc'; + $this->prefetch = new BaseDump( $url ); + break; + case 'stub': + $this->input = $url; + break; + case 'current': + $this->history = MW_EXPORT_CURRENT; + break; + case 'full': + $this->history = MW_EXPORT_FULL; + break; + } + } + + function processFileOpt( $val, $param ) { + switch( $val ) { + case "file": + return $param; + case "gzip": + return "compress.zlib://$param"; + case "bzip2": + return "compress.bzip2://$param"; + case "7zip": + return "mediawiki.compress.7z://$param"; + default: + return $val; + } + } + + /** + * Overridden to include prefetch ratio if enabled. + */ + function showReport() { + if( !$this->prefetch ) { + return parent::showReport(); + } + + if( $this->reporting ) { + $delta = wfTime() - $this->startTime; + $now = wfTimestamp( TS_DB ); + if( $delta ) { + $rate = $this->pageCount / $delta; + $revrate = $this->revCount / $delta; + $portion = $this->revCount / $this->maxCount; + $eta = $this->startTime + $delta / $portion; + $etats = wfTimestamp( TS_DB, intval( $eta ) ); + $fetchrate = 100.0 * $this->prefetchCount / $this->fetchCount; + } else { + $rate = '-'; + $revrate = '-'; + $etats = '-'; + $fetchrate = '-'; + } + global $wgDBname; + $this->progress( sprintf( "%s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), %0.1f%% prefetched, ETA %s [max %d]", + $now, $wgDBname, $this->pageCount, $rate, $this->revCount, $revrate, $fetchrate, $etats, $this->maxCount ) ); + } + } + + function readDump( $input ) { + $this->buffer = ""; + $this->openElement = false; + $this->atStart = true; + $this->state = ""; + $this->lastName = ""; + $this->thisPage = 0; + $this->thisRev = 0; + + $parser = xml_parser_create( "UTF-8" ); + xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); + + xml_set_element_handler( $parser, array( &$this, 'startElement' ), array( &$this, 'endElement' ) ); + xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) ); + + $offset = 0; // for context extraction on error reporting + $bufferSize = 512 * 1024; + do { + $chunk = fread( $input, $bufferSize ); + if( !xml_parse( $parser, $chunk, feof( $input ) ) ) { + wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" ); + return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset ); + } + $offset += strlen( $chunk ); + } while( $chunk !== false && !feof( $input ) ); + xml_parser_free( $parser ); + + return true; + } + + function getText( $id ) { + $this->fetchCount++; + if( isset( $this->prefetch ) ) { + $text = $this->prefetch->prefetch( $this->thisPage, $this->thisRev ); + if( $text === null ) { + // Entry missing from prefetch dump + } elseif( $text === "" ) { + // Blank entries may indicate that the prior dump was broken. + // To be safe, reload it. + } else { + $this->prefetchCount++; + return $text; + } + } + $id = intval( $id ); + $row = $this->db->selectRow( 'text', + array( 'old_text', 'old_flags' ), + array( 'old_id' => $id ), + 'TextPassDumper::getText' ); + $text = Revision::getRevisionText( $row ); + $stripped = str_replace( "\r", "", $text ); + $normalized = UtfNormal::cleanUp( $stripped ); + return $normalized; + } + + function startElement( $parser, $name, $attribs ) { + $this->clearOpenElement( null ); + $this->lastName = $name; + + if( $name == 'revision' ) { + $this->state = $name; + $this->egress->writeOpenPage( null, $this->buffer ); + $this->buffer = ""; + } elseif( $name == 'page' ) { + $this->state = $name; + if( $this->atStart ) { + $this->egress->writeOpenStream( $this->buffer ); + $this->buffer = ""; + $this->atStart = false; + } + } + + if( $name == "text" && isset( $attribs['id'] ) ) { + $text = $this->getText( $attribs['id'] ); + $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) ); + if( strlen( $text ) > 0 ) { + $this->characterData( $parser, $text ); + } + } else { + $this->openElement = array( $name, $attribs ); + } + } + + function endElement( $parser, $name ) { + if( $this->openElement ) { + $this->clearOpenElement( "" ); + } else { + $this->buffer .= "</$name>"; + } + + if( $name == 'revision' ) { + $this->egress->writeRevision( null, $this->buffer ); + $this->buffer = ""; + $this->thisRev = ""; + } elseif( $name == 'page' ) { + $this->egress->writeClosePage( $this->buffer ); + $this->buffer = ""; + $this->thisPage = ""; + } elseif( $name == 'mediawiki' ) { + $this->egress->writeCloseStream( $this->buffer ); + $this->buffer = ""; + } + } + + function characterData( $parser, $data ) { + $this->clearOpenElement( null ); + if( $this->lastName == "id" ) { + if( $this->state == "revision" ) { + $this->thisRev .= $data; + } elseif( $this->state == "page" ) { + $this->thisPage .= $data; + } + } + $this->buffer .= htmlspecialchars( $data ); + } + + function clearOpenElement( $style ) { + if( $this->openElement ) { + $this->buffer .= wfElement( $this->openElement[0], $this->openElement[1], $style ); + $this->openElement = false; + } + } +} + + +$dumper = new TextPassDumper( $argv ); + +if( true ) { + $dumper->dump(); +} else { + $dumper->progress( <<<END +This script postprocesses XML dumps from dumpBackup.php to add +page text which was stubbed out (using --stub). + +XML input is accepted on stdin. +XML output is sent to stdout; progress reports are sent to stderr. + +Usage: php dumpTextPass.php [<options>] +Options: + --stub=<type>:<file> To load a compressed stub dump instead of stdin + --prefetch=<type>:<file> Use a prior dump file as a text source, to save + pressure on the database. + (Requires PHP 5.0+ and the XMLReader PECL extension) + --quiet Don't dump status reports to stderr. + --report=n Report position and speed after every n pages processed. + (Default: 100) + --server=h Force reading from MySQL server h + --current Base ETA on number of pages in database instead of all revisions +END +); +} + +?> diff --git a/maintenance/duplicatetrans.php b/maintenance/duplicatetrans.php new file mode 100644 index 00000000..89d9cff1 --- /dev/null +++ b/maintenance/duplicatetrans.php @@ -0,0 +1,29 @@ +<?php +/** + * Prints out messages that are the same as the message with the corrisponding + * key in the Language.php file + * + * @package MediaWiki + * @subpackage Maintenance + */ + +require_once('commandLine.inc'); + +if ( 'en' == $wgLanguageCode ) { + print "Current selected language is English. Cannot check translations.\n"; + exit(); +} + +$count = $total = 0; +$msgarray = 'wgAllMessages' . ucfirst( $wgLanguageCode ); + +foreach ( $$msgarray as $code => $msg ) { + ++$total; + if ( @$wgAllMessagesEn[$code] == $msg ) { + echo "* $code\n"; + ++$count; + } +} + +echo "{$count} messages of {$total} are duplicates\n"; +?> diff --git a/maintenance/entities2literals.pl b/maintenance/entities2literals.pl new file mode 100644 index 00000000..dd47f6bb --- /dev/null +++ b/maintenance/entities2literals.pl @@ -0,0 +1,276 @@ +#!/usr/bin/env perl +# Takes STDIN and converts Converts hexadecimal, decimal and named HTML +# entities to their respective literals. +# +# Usage: perl entities2literals.pl < file_to_convert [> outfile] +# Reference: http://www.w3.org/TR/REC-html40/sgml/entities.html +# Copyright 2005 Ævar Arnfjörð Bjarmason <avarab@gmail.com> No rights reserved + +use encoding 'utf8'; +use strict; + +my $file = join /\n/, <>; + +$file =~ s/&#(\d+);/chr $1/eg; +$file =~ s/&#x([0-9a-fA-F]+);/chr hex $1/eg; + +while (<DATA>) { + chomp; + my ($number, $entity) = split / +/; + $file =~ s/&$entity;/chr $number/eg; +} +print $file; + +__DATA__ +34 quot +38 amp +60 lt +62 gt +160 nbsp +161 iexcl +162 cent +163 pound +164 curren +165 yen +166 brvbar +167 sect +168 uml +169 copy +170 ordf +171 laquo +172 not +173 shy +174 reg +175 macr +176 deg +177 plusmn +178 sup2 +179 sup3 +180 acute +181 micro +182 para +183 middot +184 cedil +185 sup1 +186 ordm +187 raquo +188 frac14 +189 frac12 +190 frac34 +191 iquest +192 Agrave +193 Aacute +194 Acirc +195 Atilde +196 Auml +197 Aring +198 AElig +199 Ccedil +200 Egrave +201 Eacute +202 Ecirc +203 Euml +204 Igrave +205 Iacute +206 Icirc +207 Iuml +208 ETH +209 Ntilde +210 Ograve +211 Oacute +212 Ocirc +213 Otilde +214 Ouml +215 times +216 Oslash +217 Ugrave +218 Uacute +219 Ucirc +220 Uuml +221 Yacute +222 THORN +223 szlig +224 agrave +225 aacute +226 acirc +227 atilde +228 auml +229 aring +230 aelig +231 ccedil +232 egrave +233 eacute +234 ecirc +235 euml +236 igrave +237 iacute +238 icirc +239 iuml +240 eth +241 ntilde +242 ograve +243 oacute +244 ocirc +245 otilde +246 ouml +247 divide +248 oslash +249 ugrave +250 uacute +251 ucirc +252 uuml +253 yacute +254 thorn +255 yuml +338 OElig +339 oelig +352 Scaron +353 scaron +376 Yuml +402 fnof +710 circ +732 tilde +913 Alpha +914 Beta +915 Gamma +916 Delta +917 Epsilon +918 Zeta +919 Eta +920 Theta +921 Iota +922 Kappa +923 Lambda +924 Mu +925 Nu +926 Xi +927 Omicron +928 Pi +929 Rho +931 Sigma +932 Tau +933 Upsilon +934 Phi +935 Chi +936 Psi +937 Omega +945 alpha +946 beta +947 gamma +948 delta +949 epsilon +950 zeta +951 eta +952 theta +953 iota +954 kappa +955 lambda +956 mu +957 nu +958 xi +959 omicron +960 pi +961 rho +962 sigmaf +963 sigma +964 tau +965 upsilon +966 phi +967 chi +968 psi +969 omega +977 thetasym +978 upsih +982 piv +8194 ensp +8195 emsp +8201 thinsp +8204 zwnj +8205 zwj +8206 lrm +8207 rlm +8211 ndash +8212 mdash +8216 lsquo +8217 rsquo +8218 sbquo +8220 ldquo +8221 rdquo +8222 bdquo +8224 dagger +8225 Dagger +8226 bull +8230 hellip +8240 permil +8242 prime +8243 Prime +8249 lsaquo +8250 rsaquo +8254 oline +8260 frasl +8364 euro +8465 image +8472 weierp +8476 real +8482 trade +8501 alefsym +8592 larr +8593 uarr +8594 rarr +8595 darr +8596 harr +8629 crarr +8656 lArr +8657 uArr +8658 rArr +8659 dArr +8660 hArr +8704 forall +8706 part +8707 exist +8709 empty +8711 nabla +8712 isin +8713 notin +8715 ni +8719 prod +8721 sum +8722 minus +8727 lowast +8730 radic +8733 prop +8734 infin +8736 ang +8743 and +8744 or +8745 cap +8746 cup +8747 int +8756 there4 +8764 sim +8773 cong +8776 asymp +8800 ne +8801 equiv +8804 le +8805 ge +8834 sub +8835 sup +8836 nsub +8838 sube +8839 supe +8853 oplus +8855 otimes +8869 perp +8901 sdot +8968 lceil +8969 rceil +8970 lfloor +8971 rfloor +9001 lang +9002 rang +9674 loz +9824 spades +9827 clubs +9829 hearts +9830 diams diff --git a/maintenance/eval.php b/maintenance/eval.php new file mode 100644 index 00000000..4e477f4c --- /dev/null +++ b/maintenance/eval.php @@ -0,0 +1,63 @@ +<?php +/** + * PHP lacks an interactive mode, but this can be very helpful when debugging. + * This script lets a command-line user start up the wiki engine and then poke + * about by issuing PHP commands directly. + * + * Unlike eg Python, you need to use a 'return' statement explicitly for the + * interactive shell to print out the value of the expression. Multiple lines + * are evaluated separately, so blocks need to be input without a line break. + * Fatal errors such as use of undeclared functions can kill the shell. + * + * To get decent line editing behavior, you should compile PHP with support + * for GNU readline (pass --with-readline to configure). + * + * @package MediaWiki + * @subpackage Maintenance + */ + +$wgForceLoadBalancing = (getenv('MW_BALANCE') ? true : false); +$wgUseNormalUser = (getenv('MW_WIKIUSER') ? true : false); +if (getenv('MW_PROFILING')) { + define('MW_CMDLINE_CALLBACK', 'wfSetProfiling'); +} +function wfSetProfiling() { $GLOBALS['wgProfiling'] = true; } + +$optionsWithArgs = array( 'd' ); + +/** */ +require_once( "commandLine.inc" ); + +if ( isset( $options['d'] ) ) { + $d = $options['d']; + if ( $d > 0 ) { + $wgDebugLogFile = '/dev/stdout'; + } + if ( $d > 1 ) { + foreach ( $wgLoadBalancer->mServers as $i => $server ) { + $wgLoadBalancer->mServers[$i]['flags'] |= DBO_DEBUG; + } + } + if ( $d > 2 ) { + $wgDebugFunctionEntry = true; + } +} + + +while ( ( $line = readconsole( '> ' ) ) !== false ) { + $val = eval( $line . ";" ); + if( is_null( $val ) ) { + echo "\n"; + } elseif( is_string( $val ) || is_numeric( $val ) ) { + echo "$val\n"; + } else { + var_dump( $val ); + } + if ( function_exists( "readline_add_history" ) ) { + readline_add_history( $line ); + } +} + +print "\n"; + +?> diff --git a/maintenance/fetchInterwiki.pl b/maintenance/fetchInterwiki.pl new file mode 100644 index 00000000..cb56a6df --- /dev/null +++ b/maintenance/fetchInterwiki.pl @@ -0,0 +1,102 @@ +#!/usr/bin/env perl +# Copyright (C) 2005 Ævar Arnfjörð Bjarmason +use strict; +use warnings; +use Socket; + +# Conf +my $map = &get(&url('http://usemod.com/intermap.txt')); + +# --- # +my $cont; +my @map = split /\n/, $map; + +$cont .= '<?php +# Note: this file is generated by maintenance/fetchInterwiki.pl +# Edit and rerun that script rather than modifying this directly. + +/* private */ $wgValidInterwikis = array( +'; + +$cont .= "\t# The usemod interwiki map\n"; +for (my $i=0;$i<=$#map;++$i) { + my ($name, $url) = $map[$i] =~ m#^([^ ]+) (.+)#i; + $cont .= "\t'$name' => '$url\$1',\n"; +} + +my @iso = qw( +aa ab af als am ar as ay az ba be bg bh bi bn bo bs ca chr co cs csb cy da de dk:da dz el en eo +es et eu fa fi fj fo fr fy ga gd gl gn gu gv ha he hi hr hu hy ia id ik io is it iu ja jv ka kk +kl km kn ko ks ku ky la lo lt lv mg mi mk ml mn mo mr ms my na nah nb nds ne nl no oc om or pa +pl ps pt qu rm rn ro ru rw sa sd sg sh si sk sl sm sn so sq sr ss st su sv sw ta te tg th ti tk +tl tn to tp tpi tr ts tt tw ug uk ur uz vi vo wa wo xh yi yo za zh zh-cn zh-tw zu); + +$cont .= ' + # Some custom additions: + "ReVo" => "http://purl.org/NET/voko/revo/art/$1.html", + # eg [[ReVo:cerami]], [[ReVo:astero]] - note X-sensitive! + "EcheI" => "http://www.ikso.net/cgi-bin/wiki.pl?$1", + "E\\xc4\\x89eI" => "http://www.ikso.net/cgi-bin/wiki.pl?$1", + "UnuMondo" => "http://unumondo.com/cgi-bin/wiki.pl?$1", # X-sensitive! + "JEFO" => "http://esperanto.jeunes.free.fr/vikio/index.php?$1", + "PMEG" => "http://www.bertilow.com/pmeg/$1.php", + # ekz [[PMEG:gramatiko/kunligaj vortetoj/au]] + "EnciclopediaLibre" => "http://enciclopedia.us.es/wiki.phtml?title=$1", + + # Wikipedia-specific stuff: + # Special cases + "w" => "http://www.wikipedia.org/wiki/$1", + "m" => "http://meta.wikipedia.org/wiki/$1", + "meta" => "http://meta.wikipedia.org/wiki/$1", + "sep11" => "http://sep11.wikipedia.org/wiki/$1", + "simple"=> "http://simple.wikipedia.com/wiki.cgi?$1", + "wiktionary" => "http://wiktionary.wikipedia.org/wiki/$1", + "PageHistory" => "http://www.wikipedia.org/w/wiki.phtml?title=$1&action=history", + "UserContributions" => "http://www.wikipedia.org/w/wiki.phtml?title=Special:Contributions&target=$1", + "BackLinks" => "http://www.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=$1", + + # ISO 639 2-letter language codes +'; + +for(my $i=0; $i<=$#iso;++$i) { + my @arr = split /:/, $iso[$i]; + $cont .= "\t"; + $cont .= "'$arr[0]' => 'http://"; + + if ($arr[1]) { + $cont .= $arr[1]; + } else { + $cont .= $arr[0]; + } + $cont .= ".wikipedia.org/wiki/\$1',\n"; +} + +$cont .= ' +); +?> +'; + +open IW, ">Interwiki.php"; +print IW $cont; +close IW; + +sub get { + my ($host, $url) = @_; + my $cont; + my $eat; + + my $proto = getprotobyname('tcp'); + socket(Socket, AF_INET, SOCK_STREAM, $proto); + my $iaddr = inet_aton("$host"); + my $port = getservbyname('http', 'tcp'); + my $sin = sockaddr_in($port, $iaddr); + connect(Socket, $sin); + send Socket, "GET $url HTTP/1.0\r\nHost: $host\r\n\r\n",0; + while (<Socket>) { + $cont .= $_ if $eat; # mmm, food + ++$eat if ($_ =~ /^(\n|\r\n|)$/); + } + return $cont; +} + +sub url {my ($server, $path) = $_[0] =~ m#.*(?=//)//([^/]*)(.*)#g;} diff --git a/maintenance/findhooks.php b/maintenance/findhooks.php new file mode 100644 index 00000000..4f446f2b --- /dev/null +++ b/maintenance/findhooks.php @@ -0,0 +1,93 @@ +<?php +/** + * Simple script that try to find documented hook and hooks actually + * in the code and show what's missing. + * + * This script assumes that: + * - hooks names in hooks.txt are at the beginning of a line and single quoted. + * - hooks names in code are the first parameter of wfRunHooks. + * + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ashar Voultoiz <hashar@altern.org> + * @copyright Copyright © Ashar voultoiz + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public Licence 2.0 or later + */ + +/** This is a command line script*/ +include('commandLine.inc'); + + +# GLOBALS + +$doc = $IP . '/docs/hooks.txt'; +$pathinc = $IP . '/includes/'; + + +# FUNCTIONS + +/** + * @return array of documented hooks + */ +function getHooksFromDoc() { + global $doc; + $content = file_get_contents( $doc ); + preg_match_all( "/\n'(.*?)'/", $content, $m); + return $m[1]; +} + +/** + * Get hooks from a php file + * @param $file Full filename to the PHP file. + * @return array of hooks found. + */ +function getHooksFromFile( $file ) { + $content = file_get_contents( $file ); + preg_match_all( "/wfRunHooks\(\s*\'(.*?)\'/", $content, $m); + return $m[1]; +} + +/** + * Get hooks from the source code. + * @param $path Directory where the include files can be found + * @return array of hooks found. + */ +function getHooksFromPath( $path ) { + $hooks = array(); + if( $dh = opendir($path) ) { + while(($file = readdir($dh)) !== false) { + if( filetype($path.$file) == 'file' ) { + $hooks = array_merge( $hooks, getHooksFromFile($path.$file) ); + } + } + closedir($dh); + } + return $hooks; +} + +/** + * Nicely output the array + * @param $msg A message to show before the value + * @param $arr An array + * @param $sort Boolean : wheter to sort the array (Default: true) + */ +function printArray( $msg, $arr, $sort = true ) { + if($sort) asort($arr); + foreach($arr as $v) print "$msg: $v\n"; +} + + +# MAIN + +$documented = getHooksFromDoc($doc); +$potential = getHooksFromPath($pathinc); + +$todo = array_diff($potential, $documented); +$deprecated = array_diff($documented, $potential); + +// let's show the results: +printArray('undocumented', $todo ); +printArray('not found', $deprecated ); + +?> diff --git a/maintenance/fixSlaveDesync.php b/maintenance/fixSlaveDesync.php new file mode 100644 index 00000000..e97f96c9 --- /dev/null +++ b/maintenance/fixSlaveDesync.php @@ -0,0 +1,100 @@ +<?php + +$wgUseRootUser = true; +require_once( 'commandLine.inc' ); + +//$wgDebugLogFile = '/dev/stdout'; + +$slaveIndexes = array(); +for ( $i = 1; $i < count( $wgDBservers ); $i++ ) { + if ( $wgLoadBalancer->isNonZeroLoad( $i ) ) { + $slaveIndexes[] = $i; + } +} +/* +foreach ( $wgLoadBalancer->mServers as $i => $server ) { + $wgLoadBalancer->mServers[$i]['flags'] |= DBO_DEBUG; +}*/ +$reportingInterval = 1000; + +if ( isset( $args[0] ) ) { + desyncFixPage( $args[0] ); +} else { + $dbw =& wfGetDB( DB_MASTER ); + $maxPage = $dbw->selectField( 'page', 'MAX(page_id)', false, 'fixDesync.php' ); + for ( $i=1; $i <= $maxPage; $i++ ) { + desyncFixPage( $i ); + if ( !($i % $reportingInterval) ) { + print "$i\n"; + } + } +} + +function desyncFixPage( $pageID ) { + global $slaveIndexes; + $fname = 'desyncFixPage'; + + # Check for a corrupted page_latest + $dbw =& wfGetDB( DB_MASTER ); + $realLatest = $dbw->selectField( 'page', 'page_latest', array( 'page_id' => $pageID ), $fname ); + $found = false; + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $latest = $db->selectField( 'page', 'page_latest', array( 'page_id' => $pageID ), $fname ); + $max = $db->selectField( 'revision', 'MAX(rev_id)', false, $fname ); + if ( $latest != $realLatest && $realLatest < $max ) { + print "page_latest corrupted in page $pageID, server $i\n"; + $found = true; + break; + } + } + if ( !$found ) { + return; + } + + # Find the missing revision + $res = $dbw->select( 'revision', array( 'rev_id' ), array( 'rev_page' => $pageID ), $fname ); + $masterIDs = array(); + while ( $row = $dbw->fetchObject( $res ) ) { + $masterIDs[] = $row->rev_id; + } + $dbw->freeResult( $res ); + + $res = $db->select( 'revision', array( 'rev_id' ), array( 'rev_page' => $pageID ), $fname ); + $slaveIDs = array(); + while ( $row = $db->fetchObject( $res ) ) { + $slaveIDs[] = $row->rev_id; + } + $db->freeResult( $res ); + $missingIDs = array_diff( $masterIDs, $slaveIDs ); + + if ( count( $missingIDs ) ) { + print "Found " . count( $missingIDs ) . " missing revision(s), copying from master... "; + foreach ( $missingIDs as $rid ) { + print "$rid "; + # Revision + $row = $dbw->selectRow( 'revision', '*', array( 'rev_id' => $rid ), $fname ); + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $db->insert( 'revision', get_object_vars( $row ), $fname, 'IGNORE' ); + } + + # Text + $row = $dbw->selectRow( 'text', '*', array( 'old_id' => $row->rev_text_id ), $fname ); + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $db->insert( 'text', get_object_vars( $row ), $fname, 'IGNORE' ); + } + } + print "done\n"; + } + + print "Fixing page_latest... "; + foreach ( $slaveIndexes as $i ) { + $db =& wfGetDB( $i ); + $db->update( 'page', array( 'page_latest' => $realLatest ), array( 'page_id' => $pageID ), $fname ); + } + print "done\n"; +} + +?> diff --git a/maintenance/fixTimestamps.php b/maintenance/fixTimestamps.php new file mode 100644 index 00000000..784e35cd --- /dev/null +++ b/maintenance/fixTimestamps.php @@ -0,0 +1,104 @@ +<?php + +/** + * This script fixes timestamp corruption caused by one or more webservers + * temporarily being set to the wrong time. The time offset must be known and + * consistent. Start and end times (in 14-character format) restrict the search, + * and must bracket the damage. There must be a majority of good timestamps in the + * search period. + */ + +require_once( 'commandLine.inc' ); + +if ( count( $args ) < 3 ) { + echo "Usage: php fixTimestamps.php <offset in hours> <start time> <end time>\n"; + exit(1); +} + +$offset = $args[0] * 3600; +$start = $args[1]; +$end = $args[2]; +$fname = 'fixTimestamps.php'; +$grace = 60; // maximum normal clock offset + +# Find bounding revision IDs +$dbw =& wfGetDB( DB_MASTER ); +$revisionTable = $dbw->tableName( 'revision' ); +$res = $dbw->query( "SELECT MIN(rev_id) as minrev, MAX(rev_id) as maxrev FROM $revisionTable " . + "WHERE rev_timestamp BETWEEN '{$start}' AND '{$end}'", $fname ); +$row = $dbw->fetchObject( $res ); + +if ( is_null( $row->minrev ) ) { + echo "No revisions in search period.\n"; + exit(0); +} + +$minRev = $row->minrev; +$maxRev = $row->maxrev; + +# Select all timestamps and IDs +$sql = "SELECT rev_id, rev_timestamp FROM $revisionTable " . + "WHERE rev_id BETWEEN $minRev AND $maxRev"; +if ( $offset > 0 ) { + $sql .= " ORDER BY rev_id DESC"; + $expectedSign = -1; +} else { + $expectedSign = 1; +} + +$res = $dbw->query( $sql, $fname ); + +$lastNormal = 0; +$badRevs = array(); +$numGoodRevs = 0; + +while ( $row = $dbw->fetchObject( $res ) ) { + $timestamp = wfTimestamp( TS_UNIX, $row->rev_timestamp ); + $delta = $timestamp - $lastNormal; + $sign = $delta == 0 ? 0 : $delta / abs( $delta ); + if ( $sign == 0 || $sign == $expectedSign ) { + // Monotonic change + $lastNormal = $timestamp; + ++ $numGoodRevs; + continue; + } elseif ( abs( $delta ) <= $grace ) { + // Non-monotonic change within grace interval + ++ $numGoodRevs; + continue; + } else { + // Non-monotonic change larger than grace interval + $badRevs[] = $row->rev_id; + } +} +$dbw->freeResult( $res ); + +$numBadRevs = count( $badRevs ); +if ( $numBadRevs > $numGoodRevs ) { + echo +"The majority of revisions in the search interval are marked as bad. + +Are you sure the offset ($offset) has the right sign? Positive means the clock +was incorrectly set forward, negative means the clock was incorrectly set back. + +If the offset is right, then increase the search interval until there are enough +good revisions to provide a majority reference. +"; + + exit(1); +} elseif ( $numBadRevs == 0 ) { + echo "No bad revisions found.\n"; + exit(0); +} + +printf( "Fixing %d revisions (%.2f%% of revisions in search interval)\n", + $numBadRevs, $numBadRevs / ($numGoodRevs + $numBadRevs) * 100 ); + +$fixup = -$offset; +$sql = "UPDATE $revisionTable " . + "SET rev_timestamp=DATE_FORMAT(DATE_ADD(rev_timestamp, INTERVAL $fixup SECOND), '%Y%m%d%H%i%s') " . + "WHERE rev_id IN (" . $dbw->makeList( $badRevs ) . ')'; +//echo "$sql\n"; +$dbw->query( $sql, $fname ); +echo "Done\n"; + +?> diff --git a/maintenance/fixUserRegistration.php b/maintenance/fixUserRegistration.php new file mode 100644 index 00000000..af8a68c2 --- /dev/null +++ b/maintenance/fixUserRegistration.php @@ -0,0 +1,31 @@ +<?php +/** + * Fix the user_registration field. + * In particular, for values which are NULL, set them to the date of the first edit + */ + +require_once( 'commandLine.inc' ); + +$fname = 'fixUserRegistration.php'; + +$dbr =& wfGetDB( DB_SLAVE ); +$dbw =& wfGetDB( DB_MASTER ); + +// Get user IDs which need fixing +$res = $dbr->select( 'user', 'user_id', 'user_registration IS NULL', $fname ); + +while ( $row = $dbr->fetchObject( $res ) ) { + $id = $row->user_id; + // Get first edit time + $timestamp = $dbr->selectField( 'revision', 'MIN(rev_timestamp)', array( 'rev_user' => $id ), $fname ); + // Update + if ( !empty( $timestamp ) ) { + $dbw->update( 'user', array( 'user_registration' => $timestamp ), array( 'user_id' => $id ), $fname ); + print "$id $timestamp\n"; + } else { + print "$id NULL\n"; + } +} +print "\n"; + +?> diff --git a/maintenance/generateSitemap.php b/maintenance/generateSitemap.php new file mode 100644 index 00000000..2cf8312a --- /dev/null +++ b/maintenance/generateSitemap.php @@ -0,0 +1,463 @@ +<?php +define( 'GS_MAIN', -2 ); +define( 'GS_TALK', -1 ); +/** + * Creates a Google sitemap for the site + * + * @package MediaWiki + * @subpackage Maintenance + * + * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason + * @copyright Copyright © 2005, Jens Frank <jeluf@gmx.de> + * @copyright Copyright © 2005, Brion Vibber <brion@pobox.com> + * + * @link http://www.google.com/webmasters/sitemaps/docs/en/about.html + * @link http://www.google.com/schemas/sitemap/0.84/sitemap.xsd + * + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later + */ + +class GenerateSitemap { + /** + * The maximum amount of urls in a sitemap file + * + * @link http://www.google.com/schemas/sitemap/0.84/sitemap.xsd + * + * @var int + */ + var $url_limit; + + /** + * The maximum size of a sitemap file + * + * @link http://www.google.com/webmasters/sitemaps/docs/en/protocol.html#faq_sitemap_size + * + * @var int + */ + var $size_limit; + + /** + * The path to prepend to the filename + * + * @var string + */ + var $fspath; + + /** + * The path to append to the domain name + * + * @var string + */ + var $path; + + /** + * Whether or not to use compression + * + * @var bool + */ + var $compress; + + /** + * The number of entries to save in each sitemap file + * + * @var array + */ + var $limit = array(); + + /** + * Key => value entries of namespaces and their priorities + * + * @var array + */ + var $priorities = array( + // Custom main namespaces + GS_MAIN => '0.5', + // Custom talk namesspaces + GS_TALK => '0.1', + // MediaWiki standard namespaces + NS_MAIN => '1.0', + NS_TALK => '0.1', + NS_USER => '0.5', + NS_USER_TALK => '0.1', + NS_PROJECT => '0.5', + NS_PROJECT_TALK => '0.1', + NS_IMAGE => '0.5', + NS_IMAGE_TALK => '0.1', + NS_MEDIAWIKI => '0.0', + NS_MEDIAWIKI_TALK => '0.1', + NS_TEMPLATE => '0.0', + NS_TEMPLATE_TALK => '0.1', + NS_HELP => '0.5', + NS_HELP_TALK => '0.1', + NS_CATEGORY => '0.5', + NS_CATEGORY_TALK => '0.1', + ); + + /** + * A one-dimensional array of namespaces in the wiki + * + * @var array + */ + var $namespaces = array(); + + /** + * When this sitemap batch was generated + * + * @var string + */ + var $timestamp; + + /** + * A database slave object + * + * @var object + */ + var $dbr; + + /** + * A resource pointing to the sitemap index file + * + * @var resource + */ + var $findex; + + + /** + * A resource pointing to a sitemap file + * + * @var resource + */ + var $file; + + /** + * A resource pointing to php://stderr + * + * @var resource + */ + var $stderr; + + /** + * Constructor + * + * @param string $fspath The path to prepend to the filenames, used to + * save them somewhere else than in the root directory + * @param string $path The path to append to the domain name + * @param bool $compress Whether to compress the sitemap files + */ + function GenerateSitemap( $fspath, $path, $compress ) { + global $wgDBname, $wgScriptPath; + + $this->url_limit = 50000; + $this->size_limit = pow( 2, 20 ) * 10; + $this->fspath = isset( $fspath ) ? $fspath : ''; + $this->path = isset( $path ) ? $path : $wgScriptPath; + $this->compress = $compress; + + $this->stderr = fopen( 'php://stderr', 'wt' ); + $this->dbr =& wfGetDB( DB_SLAVE ); + $this->generateNamespaces(); + $this->timestamp = wfTimestamp( TS_ISO_8601, wfTimestampNow() ); + $this->findex = fopen( "{$this->fspath}sitemap-index-$wgDBname.xml", 'wb' ); + } + + /** + * Generate a one-dimensional array of existing namespaces + */ + function generateNamespaces() { + $fname = 'GenerateSitemap::generateNamespaces'; + + $res = $this->dbr->select( 'page', + array( 'page_namespace' ), + array(), + $fname, + array( + 'GROUP BY' => 'page_namespace', + 'ORDER BY' => 'page_namespace', + ) + ); + + while ( $row = $this->dbr->fetchObject( $res ) ) + $this->namespaces[] = $row->page_namespace; + } + + /** + * Get the priority of a given namespace + * + * @param int $namespace The namespace to get the priority for + + + * @return string + */ + + function priority( $namespace ) { + return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace ); + } + + /** + * If the namespace isn't listed on the priority list return the + * default priority for the namespace, varies depending on whether it's + * a talkpage or not. + * + * @param int $namespace The namespace to get the priority for + * + * @return string + */ + function guessPriority( $namespace ) { + return Namespace::isMain( $namespace ) ? $this->priorities[GS_MAIN] : $this->priorities[GS_TALK]; + } + + /** + * Return a database resolution of all the pages in a given namespace + * + * @param int $namespace Limit the query to this namespace + * + * @return resource + */ + function getPageRes( $namespace ) { + $fname = 'GenerateSitemap::getPageRes'; + + return $this->dbr->select( 'page', + array( + 'page_namespace', + 'page_title', + 'page_touched', + ), + array( 'page_namespace' => $namespace ), + $fname + ); + } + + /** + * Main loop + * + * @access public + */ + function main() { + global $wgDBname, $wgContLang; + + fwrite( $this->findex, $this->openIndex() ); + + foreach ( $this->namespaces as $namespace ) { + $res = $this->getPageRes( $namespace ); + $this->file = false; + $this->generateLimit( $namespace ); + $length = $this->limit[0]; + $i = $smcount = 0; + + $fns = $wgContLang->getFormattedNsText( $namespace ); + $this->debug( "$namespace ($fns)" ); + while ( $row = $this->dbr->fetchObject( $res ) ) { + if ( $i++ === 0 || $i === $this->url_limit + 1 || $length + $this->limit[1] + $this->limit[2] > $this->size_limit ) { + if ( $this->file !== false ) { + $this->write( $this->file, $this->closeFile() ); + $this->close( $this->file ); + } + $filename = $this->sitemapFilename( $namespace, $smcount++ ); + $this->file = $this->open( $this->fspath . $filename, 'wb' ); + $this->write( $this->file, $this->openFile() ); + fwrite( $this->findex, $this->indexEntry( $filename ) ); + $this->debug( "\t$filename" ); + $length = $this->limit[0]; + $i = 1; + } + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $date = wfTimestamp( TS_ISO_8601, $row->page_touched ); + $entry = $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) ); + $length += strlen( $entry ); + $this->write( $this->file, $entry ); + } + if ( $this->file ) { + $this->write( $this->file, $this->closeFile() ); + $this->close( $this->file ); + } + } + fwrite( $this->findex, $this->closeIndex() ); + fclose( $this->findex ); + } + + /** + * gzopen() / fopen() wrapper + * + * @return resource + */ + function open( $file, $flags ) { + return $this->compress ? gzopen( $file, $flags ) : fopen( $file, $flags ); + } + + /** + * gzwrite() / fwrite() wrapper + */ + function write( &$handle, $str ) { + if ( $this->compress ) + gzwrite( $handle, $str ); + else + fwrite( $handle, $str ); + } + + /** + * gzclose() / fclose() wrapper + */ + function close( &$handle ) { + if ( $this->compress ) + gzclose( $handle ); + else + fclose( $handle ); + } + + /** + * Get a sitemap filename + * + * @static + * + * @param int $namespace The namespace + * @param int $count The count + * + * @return string + */ + function sitemapFilename( $namespace, $count ) { + global $wgDBname; + + $ext = $this->compress ? '.gz' : ''; + + return "sitemap-$wgDBname-NS_$namespace-$count.xml$ext"; + } + + /** + * Return the XML required to open an XML file + * + * @static + * + * @return string + */ + function xmlHead() { + return '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; + } + + /** + * Return the XML schema being used + * + * @static + * + * @returns string + */ + function xmlSchema() { + return 'http://www.google.com/schemas/sitemap/0.84'; + } + + /** + * Return the XML required to open a sitemap index file + * + * @return string + */ + function openIndex() { + return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n"; + } + + /** + * Return the XML for a single sitemap indexfile entry + * + * @static + * + * @param string $filename The filename of the sitemap file + * + * @return string + */ + function indexEntry( $filename ) { + return + "\t<sitemap>\n" . + "\t\t<loc>$filename</loc>\n" . + "\t\t<lastmod>{$this->timestamp}</lastmod>\n" . + "\t</sitemap>\n"; + } + + /** + * Return the XML required to close a sitemap index file + * + * @static + * + * @return string + */ + function closeIndex() { + return "</sitemapindex>\n"; + } + + /** + * Return the XML required to open a sitemap file + * + * @return string + */ + function openFile() { + return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n"; + } + + /** + * Return the XML for a single sitemap entry + * + * @static + * + * @param string $url An RFC 2396 compilant URL + * @param string $date A ISO 8601 date + * @param string $priority A priority indicator, 0.0 - 1.0 inclusive with a 0.1 stepsize + * + * @return string + */ + function fileEntry( $url, $date, $priority ) { + return + "\t<url>\n" . + "\t\t<loc>$url</loc>\n" . + "\t\t<lastmod>$date</lastmod>\n" . + "\t\t<priority>$priority</priority>\n" . + "\t</url>\n"; + } + + /** + * Return the XML required to close sitemap file + * + * @static + * @return string + */ + function closeFile() { + return "</urlset>\n"; + } + + /** + * Write a string to stderr followed by a UNIX newline + */ + function debug( $str ) { + fwrite( $this->stderr, "$str\n" ); + } + + /** + * Populate $this->limit + */ + function generateLimit( $namespace ) { + $title = Title::makeTitle( $namespace, str_repeat( "\xf0\xa8\xae\x81", 63 ) . "\xe5\x96\x83" ); + + $this->limit = array( + strlen( $this->openFile() ), + strlen( $this->fileEntry( $title->getFullUrl(), wfTimestamp( TS_ISO_8601, wfTimestamp() ), $this->priority( $namespace ) ) ), + strlen( $this->closeFile() ) + ); + } +} + +if ( in_array( '--help', $argv ) ) { + echo + "Usage: php generateSitemap.php [host] [options]\n" . + "\thost = hostname\n" . + "\toptions:\n" . + "\t\t--help\tshow this message\n" . + "\t\t--fspath\tThe file system path to save to, e.g /tmp/sitemap/\n" . + "\t\t--path\tThe http path to use, e.g. /wiki\n" . + "\t\t--compress=[yes|no]\tcompress the sitemap files, default yes\n"; + die( -1 ); +} + +if ( isset( $argv[1] ) && strpos( $argv[1], '--' ) !== 0 ) + $_SERVER['SERVER_NAME'] = $argv[1]; + +$optionsWithArgs = array( 'fspath', 'path', 'compress' ); +require_once 'commandLine.inc'; + +$gs = new GenerateSitemap( @$options['fspath'], @$options['path'], @$options['compress'] !== 'no' ); +$gs->main(); +?> diff --git a/maintenance/importDump.php b/maintenance/importDump.php new file mode 100644 index 00000000..1bca3296 --- /dev/null +++ b/maintenance/importDump.php @@ -0,0 +1,141 @@ +<?php +/** + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage Maintenance + */ + +$optionsWithArgs = array( 'report' ); + +require_once( 'commandLine.inc' ); +require_once( 'SpecialImport.php' ); + +class BackupReader { + var $reportingInterval = 100; + var $reporting = true; + var $pageCount = 0; + var $revCount = 0; + var $dryRun = false; + + function BackupReader() { + $this->stderr = fopen( "php://stderr", "wt" ); + } + + function reportPage( $page ) { + $this->pageCount++; + } + + function handleRevision( $rev ) { + $title = $rev->getTitle(); + if (!$title) { + $this->progress( "Got bogus revision with null title!" ); + return; + } + $display = $title->getPrefixedText(); + $timestamp = $rev->getTimestamp(); + #echo "$display $timestamp\n"; + + $this->revCount++; + $this->report(); + + if( !$this->dryRun ) { + call_user_func( $this->importCallback, $rev ); + } + } + + function report( $final = false ) { + if( $final xor ( $this->pageCount % $this->reportingInterval == 0 ) ) { + $this->showReport(); + } + } + + function showReport() { + if( $this->reporting ) { + $delta = wfTime() - $this->startTime; + if( $delta ) { + $rate = $this->pageCount / $delta; + $revrate = $this->revCount / $delta; + } else { + $rate = '-'; + $revrate = '-'; + } + $this->progress( "$this->pageCount ($rate pages/sec $revrate revs/sec)" ); + } + } + + function progress( $string ) { + fwrite( $this->stderr, $string . "\n" ); + } + + function importFromFile( $filename ) { + if( preg_match( '/\.gz$/', $filename ) ) { + $filename = 'compress.zlib://' . $filename; + } + $file = fopen( $filename, 'rt' ); + return $this->importFromHandle( $file ); + } + + function importFromStdin() { + $file = fopen( 'php://stdin', 'rt' ); + return $this->importFromHandle( $file ); + } + + function importFromHandle( $handle ) { + $this->startTime = wfTime(); + + $source = new ImportStreamSource( $handle ); + $importer = new WikiImporter( $source ); + + $importer->setPageCallback( array( &$this, 'reportPage' ) ); + $this->importCallback = $importer->setRevisionCallback( + array( &$this, 'handleRevision' ) ); + + return $importer->doImport(); + } +} + +if( wfReadOnly() ) { + wfDie( "Wiki is in read-only mode; you'll need to disable it for import to work.\n" ); +} + +$reader = new BackupReader(); +if( isset( $options['quiet'] ) ) { + $reader->reporting = false; +} +if( isset( $options['report'] ) ) { + $reader->reportingInterval = intval( $options['report'] ); +} +if( isset( $options['dry-run'] ) ) { + $reader->dryRun = true; +} + +if( isset( $args[0] ) ) { + $result = $reader->importFromFile( $args[0] ); +} else { + $result = $reader->importFromStdin(); +} + +if( WikiError::isError( $result ) ) { + echo $result->getMessage() . "\n"; +} else { + echo "Done!\n"; +} + +?> diff --git a/maintenance/importImages.inc.php b/maintenance/importImages.inc.php new file mode 100644 index 00000000..bf48c0c7 --- /dev/null +++ b/maintenance/importImages.inc.php @@ -0,0 +1,67 @@ +<?php + +/** + * Support functions for the importImages script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +/** + * Search a directory for files with one of a set of extensions + * + * @param $dir Path to directory to search + * @param $exts Array of extensions to search for + * @return mixed Array of filenames on success, or false on failure + */ +function findFiles( $dir, $exts ) { + if( is_dir( $dir ) ) { + if( $dhl = opendir( $dir ) ) { + while( ( $file = readdir( $dhl ) ) !== false ) { + if( is_file( $dir . '/' . $file ) ) { + list( $name, $ext ) = splitFilename( $dir . '/' . $file ); + if( array_search( strtolower( $ext ), $exts ) !== false ) + $files[] = $dir . '/' . $file; + } + } + return $files; + } else { + return false; + } + } else { + return false; + } +} + +/** + * Split a filename into filename and extension + * + * @param $filename Filename + * @return array + */ +function splitFilename( $filename ) { + $parts = explode( '.', $filename ); + $ext = $parts[ count( $parts ) - 1 ]; + unset( $parts[ count( $parts ) - 1 ] ); + $fname = implode( '.', $parts ); + return array( $fname, $ext ); +} + +/** + * Given an image hash, check that the structure exists to save the image file + * and create it if it doesn't + * + * @param $hash Part of an image hash, e.g. /f/fd/ + */ +function makeHashPath( $hash ) { + global $wgUploadDirectory; + $parts = explode( '/', substr( $hash, 1, strlen( $hash ) - 2 ) ); + if( !is_dir( $wgUploadDirectory . '/' . $parts[0] ) ) + mkdir( $wgUploadDirectory . '/' . $parts[0] ); + if( !is_dir( $wgUploadDirectory . '/' . $hash ) ) + mkdir( $wgUploadDirectory . '/' . $hash ); +} + + +?>
\ No newline at end of file diff --git a/maintenance/importImages.php b/maintenance/importImages.php new file mode 100644 index 00000000..925c64b7 --- /dev/null +++ b/maintenance/importImages.php @@ -0,0 +1,101 @@ +<?php + +/** + * Maintenance script to import one or more images from the local file system into + * the wiki without using the web-based interface + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +require_once( 'commandLine.inc' ); +require_once( 'importImages.inc.php' ); +echo( "Import Images\n\n" ); + +# Need a directory and at least one extension +if( count( $args ) > 1 ) { + + $dir = array_shift( $args ); + + # Check the allowed extensions + while( $ext = array_shift( $args ) ) + $exts[] = ltrim( $ext, '.' ); + + # Search the directory given and pull out suitable candidates + $files = findFiles( $dir, $exts ); + + # Set up a fake user for this operation + $wgUser = User::newFromName( 'Image import script' ); + $wgUser->setLoaded( true ); + + # Batch "upload" operation + foreach( $files as $file ) { + + $base = basename( $file ); + + # Validate a title + $title = Title::makeTitleSafe( NS_IMAGE, $base ); + if( is_object( $title ) ) { + + # Check existence + $image = new Image( $title ); + if( !$image->exists() ) { + + global $wgUploadDirectory; + + # copy() doesn't create paths so if the hash path doesn't exist, we + # have to create it + makeHashPath( wfGetHashPath( $image->name ) ); + + # Stash the file + echo( "Saving {$base}..." ); + + if( copy( $file, $image->getFullPath() ) ) { + + echo( "importing..." ); + + # Grab the metadata + $image->loadFromFile(); + + # Record the upload + if( $image->recordUpload( '', 'Importing image file' ) ) { + + # We're done! + echo( "done.\n" ); + + } else { + echo( "failed.\n" ); + } + + } else { + echo( "failed.\n" ); + } + + } else { + echo( "{$base} could not be imported; a file with this name exists in the wiki\n" ); + } + + } else { + echo( "{$base} could not be imported; a valid title cannot be produced\n" ); + } + + } + + +} else { + showUsage(); +} + +exit(); + +function showUsage( $reason = false ) { + if( $reason ) + echo( $reason . "\n" ); + echo( "USAGE: php importImages.php <dir> <ext1> <ext2>\n\n" ); + echo( "<dir> : Path to the directory containing images to be imported\n" ); + echo( "<ext1+> File extensions to import\n\n" ); + exit(); +} + +?>
\ No newline at end of file diff --git a/maintenance/importLogs.inc b/maintenance/importLogs.inc new file mode 100644 index 00000000..154657c8 --- /dev/null +++ b/maintenance/importLogs.inc @@ -0,0 +1,144 @@ +<?php +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Attempt to import existing log pages into the log tables. + * + * Not yet complete. + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( 'GlobalFunctions.php' ); +require_once( 'Database.php' ); +require_once( 'Article.php' ); +require_once( 'LogPage.php' ); + +/** + * Log importer + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ +class LogImporter { + var $dummy = false; + + function LogImporter( $type ) { + $this->type = $type; + $this->db =& wfGetDB( DB_MASTER ); + $this->actions = $this->setupActions(); + } + + function setupActions() { + $actions = array(); + foreach( LogPage::validActions( $this->type ) as $action ) { + $key = "{$this->type}/$action"; + $actions[$key] = $this->makeLineRegexp( $this->type, $action ); + } + return $actions; + } + + function makeLineRegexp( $type, $action ) { + $linkRegexp = '(?:\[\[)?([^|\]]+?)(?:\|[^\]]+?)?(?:\]\])?'; + $linkRegexp2 = '\[\[([^|\]]+?)(?:\|[^\]]+?)?\]\]'; + + $text = LogPage::actionText( $type, $action ); + $text = preg_quote( $text, '/' ); + $text = str_replace( '\$1', $linkRegexp, $text ); + $text = '^(.*?) ' . $linkRegexp2 . ' ' . $text; + $text .= '(?: <em>\((.*)\)<\/em>)?'; + $text = "/$text/"; + return $text; + } + + function importText( $text ) { + if( $this->dummy ) { + print $text; + var_dump( $this->actions ); + } + $lines = explode( '<li>', $text ); + foreach( $lines as $line ) { + if( preg_match( '!^(.*)</li>!', $line, $matches ) ) { + $this->importLine( $matches[1] ); + } + } + } + + function fixDate( $date ) { + # Yuck! Parsing multilingual date formats??!!!!???!!??! + # 01:55, 23 Aug 2004 - won't take in strtotimr + # "Aug 23 2004 01:55" - seems ok + # TODO: multilingual attempt to extract from the data in Language + if( preg_match( '/^(\d+:\d+(?::\d+)?), (.*)$/', $date, $matches ) ) { + $date = $matches[2] . ' ' . $matches[1]; + } + $n = strtotime( $date ) + date("Z"); + # print gmdate( 'D, d M Y H:i:s T', $n ) . "\n"; + $timestamp = wfTimestamp( TS_MW, $n ); + return $timestamp; + } + + function importLine( $line ) { + foreach( $this->actions as $action => $regexp ) { + if( preg_match( $regexp, $line, $matches ) ) { + if( $this->dummy ) { + #var_dump( $matches ); + } + $date = $this->fixDate( $matches[1] ); + $user = Title::newFromText( $matches[2] ); + $target = Title::newFromText( $matches[3] ); + if( isset( $matches[4] ) ) { + $comment = $matches[4]; + } else { + $comment = ''; + } + + $insert = array( + 'log_type' => $this->type, + 'log_action' => preg_replace( '!^.*/!', '', $action ), + 'log_timestamp' => $date, + 'log_user' => intval( User::idFromName( $user->getText() ) ), + 'log_namespace' => $target->getNamespace(), + 'log_title' => $target->getDBkey(), + 'log_comment' => wfUnescapeWikiText( $comment ), + ); + if( $this->dummy ) { + var_dump( $insert ); + } else { + # FIXME: avoid duplicates! + $this->db->insert( 'logging', $insert ); + } + break; + } + } + } +} + +function wfUnescapeWikiText( $text ) { + $text = str_replace( + array( '[', '|', ''', 'ISBN ', '://' , "\n=", '{{' ), + array( '[', '|', "'", 'ISBN ' , '://' , "\n=", '{{' ), + $text ); + return $text; +} + +?> diff --git a/maintenance/importLogs.php b/maintenance/importLogs.php new file mode 100644 index 00000000..6187c2e6 --- /dev/null +++ b/maintenance/importLogs.php @@ -0,0 +1,27 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); +require_once( "importLogs.inc" ); + +#print $text; +#exit(); + +foreach( LogPage::validTypes() as $type ) { + if( $type == '' ) continue; + + $page = LogPage::logName( $type ); + $log = new Article( Title::makeTitleSafe( NS_PROJECT, $page ) ); + $text = $log->fetchContent(); + + $importer = new LogImporter( $type ); + $importer->dummy = true; + $importer->importText( $text ); +} + +?> diff --git a/maintenance/importPhase2.php b/maintenance/importPhase2.php new file mode 100644 index 00000000..a73657b5 --- /dev/null +++ b/maintenance/importPhase2.php @@ -0,0 +1,370 @@ +<?php +# MediaWiki 'phase 2' to current format import script +# (import format current as of 1.2.0, March 2004) +# +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> +# Portions by Lee Daniel Crocker, 2002 +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @todo document + * @deprecated + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +die( "This import script is not currently maintained. +If you need it you'll have to modify it as necessary.\n"); + +if ( ! is_readable( "../LocalSettings.php" ) ) { + print "A copy of your installation's LocalSettings.php\n" . + "must exist in the source directory.\n"; + exit(); +} + +$wgCommandLineMode = true; +ini_set("implicit_flush", 1); + +$DP = "../includes"; +require_once( "../LocalSettings.php" ); +require_once( "../AdminSettings.php" ); + +$wgDBuser = $wgDBadminuser; +$wgDBpassword = $wgDBadminpassword; + +$sep = ( DIRECTORY_SEPARATOR == "\\" ) ? ";" : ":"; +ini_set( "include_path", "$IP$sep$include_path" ); + +require_once( "Setup.php" ); + +require_once( "../install-utils.inc" ); +require_once( "InitialiseMessages.inc" ); +require_once( "rebuildlinks.inc" ); +require_once( "rebuildrecentchanges.inc" ); +require_once( "rebuildtextindex.inc" ); + +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ +class Phase2Importer { + var $olddb, $titleCache; + + function Phase2Importer( $database ) { + $this->olddb = $database; + $this->titleCache = new TitleCache; + } + + function importAll() { + $this->importCurData(); + $this->fixCurTitles(); + + $this->importOldData(); + $this->fixOldTitles(); + + $this->importUserData(); + $this->fixUserOptions(); + + $this->importWatchlists(); + + $this->importLinkData(); + + /* + # For some reason this is broken. RecentChanges will just start anew... + rebuildRecentChangesTablePass1(); + rebuildRecentChangesTablePass2(); + */ + + print "Rebuilding search index:\n"; + dropTextIndex(); + rebuildTextIndex(); + createTextIndex(); + + initialiseMessages(); + } + + # Simple import functions; for the most part these are pretty straightforward. + # MySQL copies everything over to the new database and tweaks a few things. + function importCurData() { + print "Clearing pages from default install, if any...\n"; + wfQuery( "DELETE FROM cur", DB_MASTER ); + + print "Importing current revision data...\n"; + wfQuery( "INSERT INTO cur (cur_id,cur_namespace,cur_title,cur_text,cur_comment, + cur_user,cur_user_text,cur_timestamp,cur_restrictions,cur_counter, + cur_is_redirect,cur_minor_edit,cur_is_new,cur_random,cur_touched) + SELECT cur_id,0,cur_title,cur_text,cur_comment, + cur_user,cur_user_text,cur_timestamp,REPLACE(cur_restrictions,'is_',''),cur_counter, + cur_text like '#redirect%',cur_minor_edit,0,RAND(),NOW()+0, + FROM {$this->olddb}.cur", DB_MASTER ); + $n = mysql_affected_rows(); + print "$n rows imported.\n"; + } + + function importOldData() { + print "Clearing old revision data from default install, if any...\n"; + wfQuery( "DELETE FROM old", DB_MASTER ); + + print "Importing old revision data...\n"; + wfQuery( "INSERT INTO old (old_id,old_namespace,old_title,old_text,old_comment, + old_user,old_user_text,old_timestamp,old_minor_edit,old_flags) + SELECT old_id,0,old_title,old_text,old_comment, + old_user,old_user_text,old_timestamp,old_minor_edit,'' + FROM {$this->olddb}.old", DB_MASTER ); + $n = mysql_affected_rows(); + print "$n rows imported.\n"; + } + + function importUserData() { + print "Clearing users from default install, if any...\n"; + wfQuery( "DELETE FROM user", DB_MASTER ); + + print "Importing user data...\n"; + wfQuery( "INSERT INTO $newdb.user (user_id,user_name,user_rights, + user_password,user_newpassword,user_email,user_options,user_touched) + SELECT user_id,user_name,REPLACE(user_rights,'is_',''), + MD5(CONCAT(user_id,'-',MD5(user_password))),'',user_email,user_options,NOW()+0 + FROM {$this->olddb}.user", DB_MASTER ); + $n = mysql_affected_rows(); + print "$n rows imported.\n"; + } + + # A little less clean... + function importWatchlists() { + print "Clearing watchlists from default install, if any...\n"; + wfQuery( "DELETE FROM watchlist", DB_MASTER ); + + print "Importing watchlists..."; + $res = wfQuery( "SELECT user_id,user_watch FROM {$this->olddb}.user WHERE user_watch != ''", DB_MASTER ); + $total = wfNumRows( $res ); + $n = 0; + print " ($total total)\n"; + + while( $row = wfFetchObject( $res ) ) { + $id = intval( $row->user_id ); + $list = explode( "\n", $row->user_watch ); + foreach( $list as $page ) { + $title = $this->titleCache->fetch( $page ); + if( is_null( $title ) ) { + print "Caught bad title '{$row->title}'\n"; + } else { + $ns = $title->getNamespace(); + $t = wfStrencode( $title->getDBkey() ); + wfQuery( "INSERT INTO watchlist(wl_user,wl_namespace,wl_title) VALUES ($id,$ns,'$t')", DB_MASTER ); + } + } + if( ++$n % 50 == 0 ) { + print "$n\n"; + } + } + wfFreeResult( $res ); + } + + function importLinkData() { + # MUST BE CALLED BEFORE! fixCurTitles() + print "Clearing links from default install, if any...\n"; + wfQuery( "DELETE FROM links", DB_MASTER ); + wfQuery( "DELETE FROM brokenlinks", DB_MASTER ); + + print "Importing live links..."; + wfQuery( "INSERT INTO links (l_from, l_to) + SELECT DISTINCT linked_from,cur_id + FROM {$this->olddb}.linked,{$this->olddb}.cur + WHERE linked_to=cur_title", DB_MASTER ); + $n = mysql_affected_rows(); + print "$n rows imported.\n"; + + print "Importing broken links..."; + wfQuery( "INSERT INTO brokenlinks (bl_from, bl_to) + SELECT DISTINCT cur_id,unlinked_to + FROM {$this->olddb}.unlinked,{$this->olddb}.cur + WHERE unlinked_from=cur_title", DB_MASTER ); + $n = mysql_affected_rows(); + print "$n rows imported.\n"; + } + + # Fixup functions: munge data that's already been brought into tables + function fixCurTitles() { + $this->fixTitles( "cur" ); + } + + function fixOldTitles() { + $this->fixTitles( "old" ); + } + + function fixTitles( $table ) { + print "Fixing titles in $table..."; + $res = wfQuery( "SELECT DISTINCT {$table}_title AS title FROM $table", DB_MASTER ); + $total = wfNumRows( $res ); + $n = 0; + print " ($total total)\n"; + + while( $row = wfFetchObject( $res ) ) { + $xt = wfStrencode( $row->title ); + $title = $this->titleCache->fetch( $row->title ); + if( is_null( $title ) ) { + print "Caught bad title '{$row->title}'\n"; + } else { + $ns = $title->getNamespace(); + $t = wfStrencode( $title->getDBkey() ); + wfQuery( "UPDATE $table SET {$table}_namespace=$ns,{$table}_title='$t' + WHERE {$table}_namespace=0 AND {$table}_title='$xt'", DB_MASTER ); + } + if( ++$n % 50 == 0 ) { + print "$n\n"; + } + } + wfFreeResult( $res ); + } + + function rewriteUserOptions( $in ) + { + $s = urldecode( $in ); + $a = explode( "\n", $s ); + + foreach ( $a as $l ) { + if ( preg_match( "/^([A-Za-z0-9_]+)=(.*)/", $l, $m ) ) { + $ops[$m[1]] = $m[2]; + } + } + $nops = array(); + + $q = strtolower( $ops["quickBar"] ); + if ( $q == "none" ) { $q = 0; } + else { $q = 1; } # Default to left + $nops["quickbar"] = $q; + + if ( $ops["markupNewTopics"] == "inverse" ) { + $nops["highlightbroken"] = 1; + } + $sk = substr( strtolower( $ops["skin"] ), 0, 4 ); + if ( "star" == $sk ) { $sk = 0; } + else if ( "nost" == $sk ) { $sk = 1; } + else if ( "colo" == $sk ) { $sk = 2; } + else { $sk = 0; } + $nops["skin"] = $sk; + + $u = strtolower( $ops["underlineLinks"] ); + if ( "yes" == $u || "on" == $u ) { $nops["underline"] = 1; } + else { $nops["underline"] = 0; } + + $t = ( (int) ($ops["hourDiff"]) ); + if ( $t < -23 || $t > 23 ) { $t = 0; } + if ( 0 != $t ) { $nops["timecorrection"] = $t; } + + $j = strtolower( $ops["justify"] ); + if ( "yes" == $j || "on" == $j ) { $nops["justify"] = 1; } + $n = strtolower( $ops["numberHeadings"] ); + if ( "yes" == $n || "on" == $n ) { $nops["numberheadings"] = 1; } + $h = strtolower( $ops["hideMinor"] ); + if ( "yes" == $h || "on" == $h ) { $nops["hideminor"] = 1; } + $r = strtolower( $ops["rememberPassword"] ); + if ( "yes" == $r || "on" == $r ) { $nops["rememberpassword"] = 1; } + $s = strtolower( $ops["showHover"] ); + if ( "yes" == $s || "on" == $s ) { $nops["hover"] = 1; } + + $c = $ops["cols"]; + if ( $c < 20 || $c > 200 ) { $nops["cols"] = 80; } + else { $nops["cols"] = $c; } + $r = $ops["rows"]; + if ( $r < 5 || $r > 100 ) { $nops["rows"] = 20; } + else { $nops["rows"] = $r; } + $r = $ops["resultsPerPage"]; + if ( $r < 3 || $r > 500 ) { $nops["searchlimit"] = 20; } + else { $nops["searchlimit"] = $r; } + $r = $ops["viewRecentChanges"]; + if ( $r < 10 || $r > 1000 ) { $nops["rclimit"] = 50; } + else { $nops["rclimit"] = $r; } + $nops["rcdays"] = 3; + + $a = array(); + foreach ( $nops as $oname => $oval ) { + array_push( $a, "$oname=$oval" ); + } + $s = implode( "\n", $a ); + return $s; + } + + function fixUserOptions() { + print "Fixing user options..."; + $res = wfQuery( "SELECT user_id,user_options FROM user", DB_MASTER ); + $total = wfNumRows( $res ); + $n = 0; + print " ($total total)\n"; + + while( $row = wfFetchObject( $res ) ) { + $id = intval( $row->user_id ); + $option = wfStrencode( $this->rewriteUserOptions( $row->user_options ) ); + wfQuery( "UPDATE user SET user_options='$option' WHERE user_id=$id LIMIT 1", DB_MASTER ); + if( ++$n % 50 == 0 ) { + print "$n\n"; + } + } + wfFreeResult( $res ); + } + +} + +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ +class TitleCache { + var $hash = array(); + + function &fetch( $dbkey ) { + if( !isset( $hash[$dbkey] ) ) { + $hash[$dbkey] = Title::newFromDBkey( $dbkey ); + } + return $hash[$dbkey]; + } + +} + +# +print "You should have already run the installer to create a fresh, blank database.\n"; +print "Data will be inserted into '$wgDBname'. THIS SHOULD BE EMPTY AND ANY DATA IN IN WILL BE ERASED!\n"; +print "\nIf that's not what you want, ABORT NOW!\n\n"; + +print "Please enter the name of the old 'phase 2'-format database that will be used as a source:\n"; +print "Old database name [enciclopedia]: "; +$olddb = readconsole(); +if( empty( $olddb ) ) $olddb = "enciclopedia"; + +if( $olddb == $wgDBname ) { + die( "Can't upgrade in-place! You must create a new database and copy data into it.\n" ); +} + +print "\nSource database: '$olddb'\n"; +print " Dest database: '$wgDBname'\n"; +print "Is this correct? Anything in '$wgDBname' WILL BE DESTROYED. [y/N] "; +$response = readconsole(); +if( strtolower( $response{0} ) != 'y' ) { + die( "\nAborted by user.\n" ); +} + +print "Starting import....\n"; + +$wgTitle = Title::newFromText( "Conversion script" ); +$importer = new Phase2Importer( $olddb ); +$importer->importAll(); + +?> diff --git a/maintenance/importTextFile.inc b/maintenance/importTextFile.inc new file mode 100644 index 00000000..50b936c1 --- /dev/null +++ b/maintenance/importTextFile.inc @@ -0,0 +1,75 @@ +<?php + +/** + * Support functions for the importTextFile script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +require_once( "$IP/includes/RecentChange.php" ); + +/** + * Insert a new article + * + * @param $title Title of the article + * @param $text Text of the article + * @param $user User associated with the edit + * @param $comment Edit summary + * @param $rc Whether or not to add a recent changes event + * @return bool + */ +function insertNewArticle( &$title, $text, &$user, $comment, $rc ) { + if( !$title->exists() ) { + # Create the article + $dbw =& wfGetDB( DB_MASTER ); + $dbw->immediateBegin(); + $article = new Article( $title ); + $articleId = $article->insertOn( $dbw ); + # Prepare and save associated revision + $revision = new Revision( array( 'page' => $articleId, 'text' => $text, 'user' => $user->mId, 'user_text' => $user->getName(), 'comment' => $comment ) ); + $revisionId = $revision->insertOn( $dbw ); + # Make it the current revision + $article->updateRevisionOn( $dbw, $revision ); + $dbw->immediateCommit(); + # Update recent changes if appropriate + if( $rc ) + updateRecentChanges( $dbw, $title, $user, $comment, strlen( $text ), $articleId ); + # Touch links etc. + Article::onArticleCreate( $title ); + $article->editUpdates( $text, $comment, false, $dbw->timestamp(), $revisionId ); + return true; + } else { + # Title exists; touch nothing + return false; + } +} + +/** + * Turn a filename into a title + * + * @param $filename Filename to be transformed + * @return Title + */ +function titleFromFilename( $filename ) { + $parts = explode( '/', $filename ); + $parts = explode( '.', $parts[ count( $parts ) - 1 ] ); + return Title::newFromText( $parts[0] ); +} + +/** + * Update recent changes with the page creation event + * + * @param $dbw Database in use + * @param $title Title of the new page + * @param $user User responsible for the creation + * @param $comment Edit summary associated with the edit + * @param $size Size of the page + * @param $articleId Article identifier + */ +function updateRecentChanges( &$dbw, &$title, &$user, $comment, $size, $articleId ) { + RecentChange::notifyNew( $dbw->timestamp(), $title, false, $user, $comment, 'default', '', $size, $articleId ); +} + +?>
\ No newline at end of file diff --git a/maintenance/importTextFile.php b/maintenance/importTextFile.php new file mode 100644 index 00000000..625763be --- /dev/null +++ b/maintenance/importTextFile.php @@ -0,0 +1,111 @@ +<?php + +/** + * Maintenance script to insert an article, importing text from a file + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +$options = array( 'help', 'norc' ); +$optionsWithArgs = array( 'title', 'user', 'comment' ); +require_once( 'commandLine.inc' ); +require_once( 'importTextFile.inc' ); +echo( "Import Text File\n\n" ); + +if( !isset( $options['help'] ) || !$options['help'] ) { + + # Check file existence + $filename = $args[0]; + echo( "Using file '{$filename}'..." ); + if( file_exists( $filename ) ) { + echo( "found.\n" ); + + # Work out the title for the page + if( isset( $option['title'] ) || trim( $options['title'] ) != '' ) { + $titleText = $options['title']; + # Use the supplied title + echo( "Using title '{$titleText}'..." ); + $title = Title::newFromText( $options['title'] ); + } else { + # Attempt to make a title out of the filename + echo( "Using title from filename..." ); + $title = titleFromFilename( $filename ); + } + + # Check the title's valid + if( !is_null( $title ) && is_object( $title ) ) { + echo( "ok.\n" ); + + # Read in the text + $text = file_get_contents( $filename ); + + # Check the supplied user and fall back to a default if needed + if( isset( $options['user'] ) && trim( $options['user'] ) != '' ) { + $username = $options['user']; + } else { + $username = 'MediaWiki default'; + } + echo( "Using user '{$username}'..." ); + $user = User::newFromName( $username ); + + # Check the user's valid + if( !is_null( $user ) && is_object( $user ) ) { + echo( "ok.\n" ); + $wgUser =& $user; + + # If a comment was supplied, use it (replace _ with spaces ) else use a default + if( isset( $options['comment'] ) || trim( $options['comment'] != '' ) ) { + $comment = str_replace( '_', ' ', $options['comment'] ); + } else { + $comment = 'Importing text file'; + } + echo( "Using edit summary '{$comment}'.\n" ); + + # Do we need to update recent changes? + if( isset( $options['norc'] ) && $options['norc'] ) { + $rc = false; + } else { + $rc = true; + } + + # Attempt the insertion + echo( "Attempting to insert page..." ); + $success = insertNewArticle( $title, $text, $user, $comment, $rc ); + if( $success ) { + echo( "done.\n" ); + } else { + echo( "failed. Title exists.\n" ); + } + + } else { + # Dud user + echo( "invalid username.\n" ); + } + + } else { + # Dud title + echo( "invalid title.\n" ); + } + + } else { + # File not found + echo( "not found.\n" ); + } + +} else { + # Show help + echo( "Imports the contents of a text file into a wiki page.\n\n" ); + echo( "USAGE: php importTextFile.php [--help|--title <title>|--user <user>|--comment <comment>|--norc] <filename>\n\n" ); + echo( " --help: Show this help information\n" ); + echo( " --title <title> : Title for the new page; if not supplied, the filename is used as a base for the title\n" ); + echo( " --user <user> : User to be associated with the edit; if not supplied, a default is used\n" ); + echo( "--comment <comment> : Edit summary to be associated with the edit; underscores are transformed into spaces; if not supplied, a default is used\n" ); + echo( " <filename> : Path to the file containing the wikitext to import\n" ); + echo( " --norc : Do not add a page creation event to recent changes\n" ); + +} +echo( "\n" ); + +?>
\ No newline at end of file diff --git a/maintenance/importUseModWiki.php b/maintenance/importUseModWiki.php new file mode 100644 index 00000000..15f5e444 --- /dev/null +++ b/maintenance/importUseModWiki.php @@ -0,0 +1,365 @@ +<?php + +/** + * Import data from a UseModWiki into a MediaWiki wiki + * 2003-02-09 Brion VIBBER <brion@pobox.com> + * Based loosely on Magnus's code from 2001-2002 + * + * Updated limited version to get something working temporarily + * 2003-10-09 + * Be sure to run the link & index rebuilding scripts! + * + * Some more munging for charsets etc + * 2003-11-28 + * + * Partial fix for pages starting with lowercase letters (??) + * and CamelCase and /Subpage link conversion + * 2004-11-17 + * + * Rewrite output to create Special:Export format for import + * instead of raw SQL. Should be 'future-proof' against future + * schema changes. + * 2005-03-14 + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +if( php_sapi_name() != 'cli' ) { + echo "Please customize the settings and run me from the command line."; + die( -1 ); +} + +/** Set these correctly! */ +$wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */ +$wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki"; + +/* On a large wiki, you might run out of memory */ +@ini_set( 'memory_limit', '40M' ); + +/* globals */ +$wgFieldSeparator = "\xb3"; # Some wikis may use different char + $FS = $wgFieldSeparator ; + $FS1 = $FS."1" ; + $FS2 = $FS."2" ; + $FS3 = $FS."3" ; + +# Unicode sanitization tools +require_once( '../includes/normal/UtfNormal.php' ); + +$usercache = array(); + +importPages(); + +# ------------------------------------------------------------------------------ + +function importPages() +{ + global $wgRootDirectory; + + $gt = '>'; + echo <<<END +<?xml version="1.0" encoding="UTF-8" ?$gt +<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/ + http://www.mediawiki.org/xml/export-0.1.xsd" + version="0.1" + xml:lang="en"> +<!-- generated by importUseModWiki.php --> + +END; + $letters = array( + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', + 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', + 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' ); + foreach( $letters as $letter ) { + $dir = "$wgRootDirectory/page/$letter"; + if( is_dir( $dir ) ) + importPageDirectory( $dir ); + } + echo <<<END +</mediawiki> + +END; +} + +function importPageDirectory( $dir, $prefix = "" ) +{ + echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n"; + $mydir = opendir( $dir ); + while( $entry = readdir( $mydir ) ) { + if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) { + echo importPage( $prefix . $m[1] ); + } else { + if( is_dir( "$dir/$entry" ) ) { + if( $entry != '.' && $entry != '..' ) { + importPageDirectory( "$dir/$entry", "$entry/" ); + } + } else { + echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n"; + } + } + } +} + + +# ------------------------------------------------------------------------------ + +/* fetch_ functions + Grab a given item from the database + */ + +function useModFilename( $title ) { + $c = substr( $title, 0, 1 ); + if(preg_match( '/[A-Z]/i', $c ) ) { + return strtoupper( $c ) . "/$title"; + } + return "other/$title"; +} + +function fetchPage( $title ) +{ + global $FS,$FS1,$FS2,$FS3, $wgRootDirectory; + + $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db"; + if( !file_exists( $fname ) ) { + echo "Couldn't open file '$fname' for page '$title'.\n"; + die( -1 ); + } + + $page = splitHash( $FS1, file_get_contents( $fname ) ); + $section = splitHash( $FS2, $page["text_default"] ); + $text = splitHash( $FS3, $section["data"] ); + + return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] , + "minor" => $text["minor"] , "ts" => $section["ts"] , + "username" => $section["username"] , "host" => $section["host"] ) ); +} + +function fetchKeptPages( $title ) +{ + global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection; + + $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp"; + if( !file_exists( $fname ) ) return array(); + + $keptlist = explode( $FS1, file_get_contents( $fname ) ); + array_shift( $keptlist ); # Drop the junk at beginning of file + + $revisions = array(); + foreach( $keptlist as $rev ) { + $section = splitHash( $FS2, $rev ); + $text = splitHash( $FS3, $section["data"] ); + if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) { + array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] , + "minor" => $text["minor"] , "ts" => $section["ts"] , + "username" => $section["username"] , "host" => $section["host"] ) ) ); + } else { + echo "<!-- skipped a bad old revision -->\n"; + } + } + return $revisions; +} + +function splitHash ( $sep , $str ) { + $temp = explode ( $sep , $str ) ; + $ret = array () ; + for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) { + $ret[$temp[$i]] = $temp[++$i] ; + } + return $ret ; + } + + +/* import_ functions + Take a fetched item and produce SQL + */ + +function checkUserCache( $name, $host ) +{ + global $usercache; + + if( $name ) { + if( in_array( $name, $usercache ) ) { + $userid = $usercache[$name]; + } else { + # If we haven't imported user accounts + $userid = 0; + } + $username = str_replace( '_', ' ', $name ); + } else { + $userid = 0; + $username = $host; + } + return array( $userid, $username ); +} + +function importPage( $title ) +{ + global $usercache; + + echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n"; + $page = fetchPage( $title ); + + $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) ); + + $munged = mungeFormat( $page->text ); + if( $munged != $page->text ) { + /** + * Save a *new* revision with the conversion, and put the + * previous last version into the history. + */ + $next = array2object( array( + 'text' => $munged, + 'minor' => 1, + 'username' => 'Conversion script', + 'host' => '127.0.0.1', + 'ts' => time(), + 'summary' => 'link fix', + ) ); + $revisions = array( $page, $next ); + } else { + /** + * Current revision: + */ + $revisions = array( $page ); + } + $xml = <<<END + <page> + <title>$newtitle</title> + +END; + + # History + $revisions = array_merge( $revisions, fetchKeptPages( $title ) ); + if(count( $revisions ) == 0 ) { + return $sql; + } + + foreach( $revisions as $rev ) { + $text = xmlsafe( recodeText( $rev->text ) ); + $minor = ($rev->minor ? '<minor/>' : ''); + list( $userid, $username ) = checkUserCache( $rev->username, $rev->host ); + $username = xmlsafe( recodeText( $username ) ); + $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) ); + $comment = xmlsafe( recodeText( $rev->summary ) ); + + $xml .= <<<END + <revision> + <timestamp>$timestamp</timestamp> + <contributor><username>$username</username></contributor> + $minor + <comment>$comment</comment> + <text>$text</text> + </revision> + +END; + } + $xml .= "</page>\n\n"; + return $xml; +} + +# Whee! +function recodeText( $string ) { + global $wgImportEncoding; + # For currently latin-1 wikis + $string = str_replace( "\r\n", "\n", $string ); + $string = @iconv( $wgImportEncoding, "UTF-8", $string ); + $string = wfMungeToUtf8( $string ); # Any old Ӓ stuff + return $string; +} + +function wfUtf8Sequence($codepoint) { + if($codepoint < 0x80) return chr($codepoint); + if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . + chr($codepoint & 0x3f | 0x80); + if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . + chr($codepoint >> 6 & 0x3f | 0x80) . + chr($codepoint & 0x3f | 0x80); + if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 | 0xf0) . # Double-check this + chr($codepoint >> 12 & 0x3f | 0x80) . + chr($codepoint >> 6 & 0x3f | 0x80) . + chr($codepoint & 0x3f | 0x80); + # Doesn't yet handle outside the BMP + return "&#$codepoint;"; +} + +function wfMungeToUtf8($string) { + $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string ); + $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string ); + # Should also do named entities here + return $string; +} + +function timestamp2ISO8601( $ts ) { + #2003-08-05T18:30:02Z + return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z'; +} + +function xmlsafe( $string ) { + /** + * The page may contain old data which has not been properly normalized. + * Invalid UTF-8 sequences or forbidden control characters will make our + * XML output invalid, so be sure to strip them out. + */ + $string = UtfNormal::cleanUp( $string ); + + $string = htmlspecialchars( $string ); + return $string; +} + +function xmlCommentSafe( $text ) { + return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) ); +} + + +function array2object( $arr ) { + $o = (object)0; + foreach( $arr as $x => $y ) { + $o->$x = $y; + } + return $o; +} + + +/** + * Make CamelCase and /Talk links work + */ +function mungeFormat( $text ) { + global $nowiki; + $nowiki = array(); + $staged = preg_replace_callback( + '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s', + 'nowikiPlaceholder', $text ); + + # This is probably not 100% correct, I'm just + # glancing at the UseModWiki code. + $upper = "[A-Z]"; + $lower = "[a-z_0-9]"; + $any = "[A-Za-z_0-9]"; + $camel = "(?:$upper+$lower+$upper+$any*)"; + $subpage = "(?:\\/$any+)"; + $substart = "(?:\\/$upper$any*)"; + + $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/", + '[[$1]]', $staged ); + + $final = preg_replace( '/' . preg_quote( placeholder() ) . '/es', + 'array_shift( $nowiki )', $munged ); + return $final; +} + + +function placeholder( $x = null ) { + return '\xffplaceholder\xff'; +} + +function nowikiPlaceholder( $matches ) { + global $nowiki; + $nowiki[] = $matches[1]; + return placeholder(); +} + +?> diff --git a/maintenance/initStats.php b/maintenance/initStats.php new file mode 100644 index 00000000..b622c3f0 --- /dev/null +++ b/maintenance/initStats.php @@ -0,0 +1,78 @@ +<?php + +/** + * Maintenance script to re-initialise or update the site statistics table + * + * @package MediaWiki + * @subpackage Maintenance + * @author Brion Vibber + * @author Rob Church <robchur@gmail.com> + * @licence GNU General Public Licence 2.0 or later + */ + +$options = array( 'help', 'update', 'noviews' ); +require_once( 'commandLine.inc' ); +echo( "Refresh Site Statistics\n\n" ); +$dbr =& wfGetDB( DB_SLAVE ); +$fname = 'initStats'; + +if( isset( $options['help'] ) ) { + showHelp(); + exit(); +} + +echo( "Counting total edits..." ); +$edits = $dbr->selectField( 'revision', 'COUNT(*)', '', $fname ); +echo( "{$edits}\nCounting number of articles..." ); + +global $wgContentNamespaces; +$good = $dbr->selectField( 'page', 'COUNT(*)', array( 'page_namespace' => $wgContentNamespaces, 'page_is_redirect' => 0, 'page_len > 0' ), $fname ); +echo( "{$good}\nCounting total pages..." ); + +$pages = $dbr->selectField( 'page', 'COUNT(*)', '', $fname ); +echo( "{$pages}\nCounting number of users..." ); + +$users = $dbr->selectField( 'user', 'COUNT(*)', '', $fname ); +echo( "{$users}\nCounting number of admins..." ); + +$admin = $dbr->selectField( 'user_groups', 'COUNT(*)', array( 'ug_group' => 'sysop' ), $fname ); +echo( "{$admin}\nCounting number of images..." ); + +$image = $dbr->selectField( 'image', 'COUNT(*)', '', $fname ); +echo( "{$image}\n" ); + +if( !isset( $options['noviews'] ) ) { + echo( "Counting total page views..." ); + $views = $dbr->selectField( 'page', 'SUM(page_counter)', '', $fname ); + echo( "{$views}\n" ); +} + +echo( "\nUpdating site statistics..." ); + +$dbw =& wfGetDB( DB_MASTER ); +$values = array( 'ss_total_edits' => $edits, + 'ss_good_articles' => $good, + 'ss_total_pages' => $pages, + 'ss_users' => $users, + 'ss_admins' => $admin, + 'ss_images' => $image ); +$conds = array( 'ss_row_id' => 1 ); +$views = array( 'ss_total_views' => isset( $views ) ? $views : 0 ); + +if( isset( $options['update'] ) ) { + $dbw->update( 'site_stats', $values, $conds, $fname ); +} else { + $dbw->delete( 'site_stats', $conds, $fname ); + $dbw->insert( 'site_stats', array_merge( $values, $conds, $views ), $fname ); +} + +echo( "done.\n\n" ); + +function showHelp() { + echo( "Re-initialise the site statistics tables.\n\n" ); + echo( "Usage: php initStats.php [--update|--noviews]\n\n" ); + echo( " --update : Update the existing statistics (preserves the ss_total_views field)\n" ); + echo( "--noviews : Don't update the page view counter\n\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/interwiki.sql b/maintenance/interwiki.sql new file mode 100644 index 00000000..ca656e46 --- /dev/null +++ b/maintenance/interwiki.sql @@ -0,0 +1,179 @@ +-- Based more or less on the public interwiki map from MeatballWiki +-- Default interwiki prefixes... + +REPLACE INTO /*$wgDBprefix*/interwiki (iw_prefix,iw_url,iw_local) VALUES +('abbenormal','http://www.ourpla.net/cgi-bin/pikie.cgi?$1',0), +('acadwiki','http://xarch.tu-graz.ac.at/autocad/wiki/$1',0), +('acronym','http://www.acronymfinder.com/af-query.asp?String=exact&Acronym=$1',0), +('advogato','http://www.advogato.org/$1',0), +('aiwiki','http://www.ifi.unizh.ch/ailab/aiwiki/aiw.cgi?$1',0), +('alife','http://news.alife.org/wiki/index.php?$1',0), +('annotation','http://bayle.stanford.edu/crit/nph-med.cgi/$1',0), +('annotationwiki','http://www.seedwiki.com/page.cfm?wikiid=368&doc=$1',0), +('arxiv','http://www.arxiv.org/abs/$1',0), +('aspienetwiki','http://aspie.mela.de/Wiki/index.php?title=$1',0), +('bemi','http://bemi.free.fr/vikio/index.php?$1',0), +('benefitswiki','http://www.benefitslink.com/cgi-bin/wiki.cgi?$1',0), +('brasilwiki','http://rio.ifi.unizh.ch/brasilienwiki/index.php/$1',0), +('bridgeswiki','http://c2.com/w2/bridges/$1',0), +('c2find','http://c2.com/cgi/wiki?FindPage&value=$1',0), +('cache','http://www.google.com/search?q=cache:$1',0), +('ciscavate','http://ciscavate.org/index.php/$1',0), +('cliki','http://ww.telent.net/cliki/$1',0), +('cmwiki','http://www.ourpla.net/cgi-bin/wiki.pl?$1',0), +('codersbase','http://www.codersbase.com/$1',0), +('commons','http://commons.wikimedia.org/wiki/$1',0), +('consciousness','http://teadvus.inspiral.org/',0), +('corpknowpedia','http://corpknowpedia.org/wiki/index.php/$1',0), +('creationmatters','http://www.ourpla.net/cgi-bin/wiki.pl?$1',0), +('dejanews','http://www.deja.com/=dnc/getdoc.xp?AN=$1',0), +('demokraatia','http://wiki.demokraatia.ee/',0), +('dictionary','http://www.dict.org/bin/Dict?Database=*&Form=Dict1&Strategy=*&Query=$1',0), +('disinfopedia','http://www.disinfopedia.org/wiki.phtml?title=$1',0), +('diveintoosx','http://diveintoosx.org/$1',0), +('docbook','http://docbook.org/wiki/moin.cgi/$1',0), +('dolphinwiki','http://www.object-arts.com/wiki/html/Dolphin/$1',0), +('drumcorpswiki','http://www.drumcorpswiki.com/index.php/$1',0), +('dwjwiki','http://www.suberic.net/cgi-bin/dwj/wiki.cgi?$1',0), +('eĉei','http://www.ikso.net/cgi-bin/wiki.pl?$1',0), +('echei','http://www.ikso.net/cgi-bin/wiki.pl?$1',0), +('ecxei','http://www.ikso.net/cgi-bin/wiki.pl?$1',0), +('efnetceewiki','http://purl.net/wiki/c/$1',0), +('efnetcppwiki','http://purl.net/wiki/cpp/$1',0), +('efnetpythonwiki','http://purl.net/wiki/python/$1',0), +('efnetxmlwiki','http://purl.net/wiki/xml/$1',0), +('eljwiki','http://elj.sourceforge.net/phpwiki/index.php/$1',0), +('emacswiki','http://www.emacswiki.org/cgi-bin/wiki.pl?$1',0), +('elibre','http://enciclopedia.us.es/index.php/$1',0), +('eokulturcentro','http://esperanto.toulouse.free.fr/wakka.php?wiki=$1',0), +('evowiki','http://www.evowiki.org/index.php/$1',0), +('finalempire','http://final-empire.sourceforge.net/cgi-bin/wiki.pl?$1',0), +('firstwiki','http://firstwiki.org/index.php/$1',0), +('foldoc','http://www.foldoc.org/foldoc/foldoc.cgi?$1',0), +('foxwiki','http://fox.wikis.com/wc.dll?Wiki~$1',0), +('fr.be','http://fr.wikinations.be/$1',0), +('fr.ca','http://fr.ca.wikinations.org/$1',0), +('fr.fr','http://fr.fr.wikinations.org/$1',0), +('fr.org','http://fr.wikinations.org/$1',0), +('freebsdman','http://www.FreeBSD.org/cgi/man.cgi?apropos=1&query=$1',0), +('gamewiki','http://gamewiki.org/wiki/index.php/$1',0), +('gej','http://www.esperanto.de/cgi-bin/aktivikio/wiki.pl?$1',0), +('gentoo-wiki','http://gentoo-wiki.com/$1',0), +('globalvoices','http://cyber.law.harvard.edu/dyn/globalvoices/wiki/$1',0), +('gmailwiki','http://www.gmailwiki.com/index.php/$1',0), +('google','http://www.google.com/search?q=$1',0), +('googlegroups','http://groups.google.com/groups?q=$1',0), +('gotamac','http://www.got-a-mac.org/$1',0), +('greencheese','http://www.greencheese.org/$1',0), +('hammondwiki','http://www.dairiki.org/HammondWiki/index.php3?$1',0), +('haribeau','http://wiki.haribeau.de/cgi-bin/wiki.pl?$1',0), +('hewikisource','http://he.wikisource.org/wiki/$1',1), +('herzkinderwiki','http://www.herzkinderinfo.de/Mediawiki/index.php/$1',0), +('hrwiki','http://www.hrwiki.org/index.php/$1',0), +('iawiki','http://www.IAwiki.net/$1',0), +('imdb','http://us.imdb.com/Title?$1',0), +('infosecpedia','http://www.infosecpedia.org/pedia/index.php/$1',0), +('jargonfile','http://sunir.org/apps/meta.pl?wiki=JargonFile&redirect=$1',0), +('jefo','http://www.esperanto-jeunes.org/vikio/index.php?$1',0), +('jiniwiki','http://www.cdegroot.com/cgi-bin/jini?$1',0), +('jspwiki','http://www.ecyrd.com/JSPWiki/Wiki.jsp?page=$1',0), +('kerimwiki','http://wiki.oxus.net/$1',0), +('kmwiki','http://www.voght.com/cgi-bin/pywiki?$1',0), +('knowhow','http://www2.iro.umontreal.ca/~paquetse/cgi-bin/wiki.cgi?$1',0), +('lanifexwiki','http://opt.lanifex.com/cgi-bin/wiki.pl?$1',0), +('lasvegaswiki','http://wiki.gmnow.com/index.php/$1',0), +('linuxwiki','http://www.linuxwiki.de/$1',0), +('lojban','http://www.lojban.org/tiki/tiki-index.php?page=$1',0), +('lqwiki','http://wiki.linuxquestions.org/wiki/$1',0), +('lugkr','http://lug-kr.sourceforge.net/cgi-bin/lugwiki.pl?$1',0), +('lutherwiki','http://www.lutheranarchives.com/mw/index.php/$1',0), +('mathsongswiki','http://SeedWiki.com/page.cfm?wikiid=237&doc=$1',0), +('mbtest','http://www.usemod.com/cgi-bin/mbtest.pl?$1',0), +('meatball','http://www.usemod.com/cgi-bin/mb.pl?$1',0), +('mediazilla','http://bugzilla.wikipedia.org/$1',1), +('memoryalpha','http://www.memory-alpha.org/en/index.php/$1',0), +('metaweb','http://www.metaweb.com/wiki/wiki.phtml?title=$1',0), +('metawiki','http://sunir.org/apps/meta.pl?$1',0), +('metawikipedia','http://meta.wikimedia.org/wiki/$1',0), +('moinmoin','http://purl.net/wiki/moin/$1',0), +('mozillawiki','http://wiki.mozilla.org/index.php/$1',0), +('muweb','http://www.dunstable.com/scripts/MuWebWeb?$1',0), +('netvillage','http://www.netbros.com/?$1',0), +('oeis','http://www.research.att.com/cgi-bin/access.cgi/as/njas/sequences/eisA.cgi?Anum=$1',0), +('openfacts','http://openfacts.berlios.de/index.phtml?title=$1',0), +('openwiki','http://openwiki.com/?$1',0), +('opera7wiki','http://nontroppo.org/wiki/$1',0), +('orgpatterns','http://www.bell-labs.com/cgi-user/OrgPatterns/OrgPatterns?$1',0), +('osi reference model','http://wiki.tigma.ee/',0), +('pangalacticorg','http://www.pangalactic.org/Wiki/$1',0), +('personaltelco','http://www.personaltelco.net/index.cgi/$1',0), +('patwiki','http://gauss.ffii.org/$1',0), +('phpwiki','http://phpwiki.sourceforge.net/phpwiki/index.php?$1',0), +('pikie','http://pikie.darktech.org/cgi/pikie?$1',0), +('pmeg','http://www.bertilow.com/pmeg/$1.php',0), +('ppr','http://c2.com/cgi/wiki?$1',0), +('purlnet','http://purl.oclc.org/NET/$1',0), +('pythoninfo','http://www.python.org/cgi-bin/moinmoin/$1',0), +('pythonwiki','http://www.pythonwiki.de/$1',0), +('pywiki','http://www.voght.com/cgi-bin/pywiki?$1',0), +('raec','http://www.raec.clacso.edu.ar:8080/raec/Members/raecpedia/$1',0), +('revo','http://purl.org/NET/voko/revo/art/$1.html',0), +('rfc','http://www.rfc-editor.org/rfc/rfc$1.txt',0), +('s23wiki','http://is-root.de/wiki/index.php/$1',0), +('scoutpedia','http://www.scoutpedia.info/index.php/$1',0), +('seapig','http://www.seapig.org/$1',0), +('seattlewiki','http://seattlewiki.org/wiki/$1',0), +('seattlewireless','http://seattlewireless.net/?$1',0), +('seeds','http://www.IslandSeeds.org/wiki/$1',0), +('senseislibrary','http://senseis.xmp.net/?$1',0), +('shakti','http://cgi.algonet.se/htbin/cgiwrap/pgd/ShaktiWiki/$1',0), +('slashdot','http://slashdot.org/article.pl?sid=$1',0), +('smikipedia','http://www.smikipedia.org/$1',0), +('sockwiki','http://wiki.socklabs.com/$1',0), +('sourceforge','http://sourceforge.net/$1',0), +('squeak','http://minnow.cc.gatech.edu/squeak/$1',0), +('strikiwiki','http://ch.twi.tudelft.nl/~mostert/striki/teststriki.pl?$1',0), +('susning','http://www.susning.nu/$1',0), +('svgwiki','http://www.protocol7.com/svg-wiki/default.asp?$1',0), +('tavi','http://tavi.sourceforge.net/$1',0), +('tejo','http://www.tejo.org/vikio/$1',0), +('terrorwiki','http://www.liberalsagainstterrorism.com/wiki/index.php/$1',0), +('tmbw','http://www.tmbw.net/wiki/index.php/$1',0), +('tmnet','http://www.technomanifestos.net/?$1',0), +('tmwiki','http://www.EasyTopicMaps.com/?page=$1',0), +('turismo','http://www.tejo.org/turismo/$1',0), +('theopedia','http://www.theopedia.com/$1',0), +('twiki','http://twiki.org/cgi-bin/view/$1',0), +('twistedwiki','http://purl.net/wiki/twisted/$1',0), +('uea','http://www.tejo.org/uea/$1',0), +('unreal','http://wiki.beyondunreal.com/wiki/$1',0), +('ursine','http://ursine.ca/$1',0), +('usej','http://www.tejo.org/usej/$1',0), +('usemod','http://www.usemod.com/cgi-bin/wiki.pl?$1',0), +('visualworks','http://wiki.cs.uiuc.edu/VisualWorks/$1',0), +('warpedview','http://www.warpedview.com/index.php/$1',0), +('webdevwikinl','http://www.promo-it.nl/WebDevWiki/index.php?page=$1',0), +('webisodes','http://www.webisodes.org/$1',0), +('webseitzwiki','http://webseitz.fluxent.com/wiki/$1',0), +('why','http://clublet.com/c/c/why?$1',0), +('wiki','http://c2.com/cgi/wiki?$1',0), +('wikia','http://www.wikia.com/wiki/index.php/$1',0), +('wikibooks','http://en.wikibooks.org/wiki/$1',1), +('wikicities','http://www.wikicities.com/index.php/$1',0), +('wikif1','http://www.wikif1.org/$1',0), +('wikinfo','http://www.wikinfo.org/wiki.php?title=$1',0), +('wikimedia','http://wikimediafoundation.org/wiki/$1',0), +('wikiquote','http://en.wikiquote.org/wiki/$1',1), +('wikinews','http://en.wikinews.org/wiki/$1',0), +('wikisource','http://sources.wikipedia.org/wiki/$1',1), +('wikispecies','http://species.wikipedia.org/wiki/$1',1), +('wikitravel','http://wikitravel.org/en/$1',0), +('wikiworld','http://WikiWorld.com/wiki/index.php/$1',0), +('wiktionary','http://en.wiktionary.org/wiki/$1',1), +('wlug','http://www.wlug.org.nz/$1',0), +('wlwiki','http://winslowslair.supremepixels.net/wiki/index.php/$1',0), +('ypsieyeball','http://sknkwrks.dyndns.org:1957/writewiki/wiki.pl?$1',0), +('zwiki','http://www.zwiki.org/$1',0), +('zzz wiki','http://wiki.zzz.ee/',0), +('wikt','http://en.wiktionary.org/wiki/$1',1); + diff --git a/maintenance/lang2po.php b/maintenance/lang2po.php new file mode 100644 index 00000000..af6bceea --- /dev/null +++ b/maintenance/lang2po.php @@ -0,0 +1,154 @@ +<?php +/** + * Convert Language files to .po files ! + * + * Todo: + * - generate .po header + * - fix escaping of \ + */ + +/** This is a command line script */ +require_once('commandLine.inc'); +require_once('languages.inc'); + +define('ALL_LANGUAGES', true); +define('XGETTEXT_BIN', 'xgettext'); +define('MSGMERGE_BIN', 'msgmerge'); + +// used to generate the .pot +define('XGETTEXT_OPTIONS', '-n --keyword=wfMsg --keyword=wfMsgForContent --keyword=wfMsgHtml --keyword=wfMsgWikiHtml '); +define('MSGMERGE_OPTIONS', ' -v '); + +define('LOCALE_OUTPUT_DIR', $IP.'/locale'); + + +if( isset($options['help']) ) { usage(); wfDie(); } +// default output is WikiText +if( !isset($options['lang']) ) { $options['lang'] = ALL_LANGUAGES; } + +function usage() { +print <<<END +Usage: php lang2po.php [--help] [--lang=<langcode>] [--stdout] + --help: this message. + --lang: a lang code you want to generate a .po for (default: all languages). + +END; +} + + +/** + * Return a dummy header for later edition. + * @return string A dummy header + */ +function poHeader() { +return +'# SOME DESCRIPTIVE TITLE. +# Copyright (C) 2005 MediaWiki +# This file is distributed under the same license as the MediaWiki package. +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: bugzilllaaaaa\n" +"POT-Creation-Date: 2005-08-16 20:13+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: VARIOUS <nobody>\n" +"Language-Team: LANGUAGE <nobody>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +'; +} + +/** + * generate and write a file in .po format. + * + * @param string $langcode Code of a language it will process. + * @param array &$messages Array containing the various messages. + * @return string Filename where stuff got saved or false. + */ +function generatePo($langcode, &$messages) { + $data = poHeader(); + + // Generate .po entries + foreach($messages as $identifier => $content) { + $data .= "msgid \"$identifier\"\n"; + + // Escape backslashes + $tmp = str_replace('\\', '\\\\', $content); + // Escape doublelquotes + $tmp = preg_replace( "/(?<!\\\\)\"/", '\"', $tmp); + // Rewrite multilines to gettext format + $tmp = str_replace("\n", "\"\n\"", $tmp); + + $data .= 'msgstr "'. $tmp . "\"\n\n"; + } + + // Write the content to a file in locale/XX/messages.po + $dir = LOCALE_OUTPUT_DIR.'/'.$langcode; + if( !is_dir($dir) ) { mkdir( $dir, 0770 ); } + $filename = $dir.'/fromlanguagefile.po'; + + $file = fopen( $filename , 'wb' ); + if( fwrite( $file, $data ) ) { + fclose( $file ); + return $filename; + } else { + fclose( $file ); + return false; + } +} + +function generatePot() { + global $IP; + $curdir = getcwd(); + chdir($IP); + exec( XGETTEXT_BIN + .' '.XGETTEXT_OPTIONS + .' -o '.LOCALE_OUTPUT_DIR.'/wfMsg.pot' + .' includes/*php' + ); + chdir($curdir); +} + +function applyPot($langcode) { + $langdir = LOCALE_OUTPUT_DIR.'/'.$langcode; + + $from = $langdir.'/fromlanguagefile.po'; + $pot = LOCALE_OUTPUT_DIR.'/wfMsg.pot'; + $dest = $langdir.'/messages.po'; + + // Merge template and generate file to get final .po + exec(MSGMERGE_BIN.MSGMERGE_OPTIONS." $from $pot -o $dest "); + // delete no more needed file +// unlink($from); +} + +// Generate a template .pot based on source tree +echo "Getting 'gettext' default messages from sources:"; +generatePot(); +echo "done.\n"; + + +$langTool = new languages(); + +// Do all languages +foreach ( $langTool->getList() as $langcode) { + echo "Loading messages for $langcode:\t"; + require_once( 'languages/Language' . $langcode . '.php' ); + $arr = 'wgAllMessages'.$langcode; + if(!@is_array($$arr)) { + echo "NONE FOUND\n"; + } else { + echo "ok\n"; + if( ! generatePo($langcode, $$arr) ) { + echo "ERROR: Failed to wrote file.\n"; + } else { + echo "Applying template:"; + applyPot($langcode); + } + } +} +?> diff --git a/maintenance/langmemusage.php b/maintenance/langmemusage.php new file mode 100644 index 00000000..d45de0e4 --- /dev/null +++ b/maintenance/langmemusage.php @@ -0,0 +1,30 @@ +<?php +/** + * Dumb program that tries to get the memory usage + * for each language file. + */ + +/** This is a command line script */ +require_once('commandLine.inc'); +require_once('languages.inc'); + +$langtool = new languages(); + +if ( ! function_exists( 'memory_get_usage' ) ) + wfDie( "You must compile PHP with --enable-memory-limit\n" ); + +$memlast = $memstart = memory_get_usage(); + +print 'Base memory usage: '.$memstart."\n"; + +foreach($langtool->getList() as $langcode) { + require_once('languages/Language'.$langcode.'.php'); + $memstep = memory_get_usage(); + printf( "%12s: %d\n", $langcode, ($memstep- $memlast) ); + $memlast = $memstep; +} + +$memend = memory_get_usage(); + +echo ' Total Usage: '.($memend - $memstart)."\n"; +?> diff --git a/maintenance/languages.inc b/maintenance/languages.inc new file mode 100644 index 00000000..e318259d --- /dev/null +++ b/maintenance/languages.inc @@ -0,0 +1,48 @@ +<?php +/** + * Library to grab data from languages files + * + * WORK IN PROGRESS. There is some bugs when including the same + * file multiple time :((( + */ +require_once('commandLine.inc'); + +class languages { + /** Contain the list of languages available */ + var $list = array(); + /** some messages for the current lang */ + var $messages = array(); + + function languages() { + $this->clear(); + $this->loadList(); + } + + function clear() { + $this->list = array(); + $this->messages = array(); + } + + function loadList() { + global $IP; + $this->list = array(); + + // available language files + $dir = opendir("$IP/languages"); + while ($file = readdir($dir)) { + if (preg_match("/Language([^.]*?)\.php$/", $file, $m)) { + $this->list[] = $m[1]; + } + } + sort($this->list); + + // Cleanup file list + foreach($this->list as $key => $lang) { + if ($lang == 'Utf8' || $lang == '' || $lang == 'Converter') + unset($this->list[$key]); + } + } + + function getList() { return $this->list; } +} +?> diff --git a/maintenance/mcc.php b/maintenance/mcc.php new file mode 100644 index 00000000..93b6ec18 --- /dev/null +++ b/maintenance/mcc.php @@ -0,0 +1,173 @@ +<?php +/** + * memcached diagnostic tool + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( 'commandLine.inc' ); +require_once( 'memcached-client.php' ); + +$mcc = new memcached( array('persistant' => true/*, 'debug' => true*/) ); +$mcc->set_servers( $wgMemCachedServers ); +#$mcc->set_debug( true ); + +function mccShowHelp($command) { + + if(! $command ) { $command = 'fullhelp'; } + $onlyone = true; + + switch ( $command ) { + + case 'fullhelp': + // will show help for all commands + $onlyone = false; + + case 'get': + print "get: grabs something\n"; + if($onlyone) { break; } + + case 'getsock': + print "getsock: lists sockets\n"; + if($onlyone) { break; } + + case 'set': + print "set: changes something\n"; + if($onlyone) { break; } + + case 'delete': + print "delete: deletes something\n"; + if($onlyone) { break; } + + case 'history': + print "history: show command line history\n"; + if($onlyone) { break; } + + case 'server': + print "server: show current memcached server\n"; + if($onlyone) { break; } + + case 'dumpmcc': + print "dumpmcc: shows the whole thing\n"; + if($onlyone) { break; } + + case 'exit': + case 'quit': + print "exit or quit: exit mcc\n"; + if($onlyone) { break; } + + case 'help': + print "help: help about a command\n"; + if($onlyone) { break; } + + default: + if($onlyone) { + print "$command: command does not exist or no help for it\n"; + } + } +} + +do { + $bad = false; + $showhelp = false; + $quit = false; + + $line = readconsole( '> ' ); + if ($line === false) exit; + + $args = explode( ' ', $line ); + $command = array_shift( $args ); + + // process command + switch ( $command ) { + case 'help': + // show an help message + mccShowHelp(array_shift($args)); + break; + + case 'get': + print "Getting {$args[0]}[{$args[1]}]\n"; + $res = $mcc->get( $args[0] ); + if ( array_key_exists( 1, $args ) ) { + $res = $res[$args[1]]; + } + if ( $res === false ) { + #print 'Error: ' . $mcc->error_string() . "\n"; + print "MemCached error\n"; + } elseif ( is_string( $res ) ) { + print "$res\n"; + } else { + var_dump( $res ); + } + break; + + case 'getsock': + $res = $mcc->get( $args[0] ); + $sock = $mcc->get_sock( $args[0] ); + var_dump( $sock ); + break; + + case 'server': + $res = $mcc->get( $args[0] ); + print $mcc->_buckets[$mcc->_hashfunc( $args[0] ) % $mcc->_bucketcount] . "\n"; + break; + + case 'set': + $key = array_shift( $args ); + if ( $args[0] == "#" && is_numeric( $args[1] ) ) { + $value = str_repeat( '*', $args[1] ); + } else { + $value = implode( ' ', $args ); + } + if ( !$mcc->set( $key, $value, 0 ) ) { + #print 'Error: ' . $mcc->error_string() . "\n"; + print "MemCached error\n"; + } + break; + + case 'delete': + $key = implode( ' ', $args ); + if ( !$mcc->delete( $key ) ) { + #print 'Error: ' . $mcc->error_string() . "\n"; + print "MemCached error\n"; + } + break; + + case 'history': + if ( function_exists( 'readline_list_history' ) ) { + foreach( readline_list_history() as $num => $line) { + print "$num: $line\n"; + } + } else { + print "readline_list_history() not available\n"; + } + break; + + case 'dumpmcc': + var_dump( $mcc ); + break; + + case 'quit': + case 'exit': + $quit = true; + break; + + default: + $bad = true; + } // switch() end + + if ( $bad ) { + if ( $command ) { + print "Bad command\n"; + } + } else { + if ( function_exists( 'readline_add_history' ) ) { + readline_add_history( $line ); + } + } +} while ( !$quit ); + +?> diff --git a/maintenance/mctest.php b/maintenance/mctest.php new file mode 100644 index 00000000..95249b29 --- /dev/null +++ b/maintenance/mctest.php @@ -0,0 +1,59 @@ +<?php +/* $Id: mctest.php 12896 2006-01-28 08:22:24Z timstarling $ */ + +$optionsWithArgs = array( 'i' ); + +require_once('commandLine.inc'); + +#$wgDebugLogFile = '/dev/stdout'; + +if ( isset( $args[0] ) ) { + $wgMemCachedServers = array( $args[0] ); +} else { + $wgMemCachedServers[] = 'localhost'; +} +if ( isset( $options['i'] ) ) { + $iterations = $options['i']; +} else { + $iterations = 100; +} + +foreach ( $wgMemCachedServers as $server ) { + print "$server "; + $mcc = new MemCachedClientforWiki( array('persistant' => true) ); + $mcc->set_servers( array( $server ) ); + $set = 0; + $incr = 0; + $get = 0; + $time_start=microtime_float(); + for ( $i=1; $i<=$iterations; $i++ ) { + if ( !is_null( $mcc->set( "test$i", $i ) ) ) { + $set++; + } + } + + for ( $i=1; $i<=$iterations; $i++ ) { + if ( !is_null( $mcc->incr( "test$i", $i ) ) ) { + $incr++; + } + } + + for ( $i=1; $i<=$iterations; $i++ ) { + $value = $mcc->get( "test$i" ); + if ( $value == $i*2 ) { + $get++; + } + } + $exectime=microtime_float()-$time_start; + + print "set: $set incr: $incr get: $get time: $exectime\n"; +} + +function microtime_float() +{ + list($usec, $sec) = explode(" ", microtime()); + return ((float)$usec + (float)$sec); +} + + +?> diff --git a/maintenance/moveBatch.php b/maintenance/moveBatch.php new file mode 100644 index 00000000..8d7141cd --- /dev/null +++ b/maintenance/moveBatch.php @@ -0,0 +1,85 @@ +<?php + +# Move a batch of pages +# Usage: php moveBatch.php [-u <user>] [-r <reason>] [-i <interval>] <listfile> +# where +# <listfile> is a file where each line has two titles separated by a pipe +# character. The first title is the source, the second is the destination. +# <user> is the username +# <reason> is the move reason +# <interval> is the number of seconds to sleep for after each move + +$oldCwd = getcwd(); +$optionsWithArgs = array( 'u', 'r', 'i' ); +require_once( 'commandLine.inc' ); + +chdir( $oldCwd ); + +# Options processing + +$filename = 'php://stdin'; +$user = 'Move page script'; +$reason = ''; +$interval = 0; + +if ( isset( $args[0] ) ) { + $filename = $args[0]; +} +if ( isset( $options['u'] ) ) { + $user = $options['u']; +} +if ( isset( $options['r'] ) ) { + $reason = $options['r']; +} +if ( isset( $options['i'] ) ) { + $interval = $options['i']; +} + +$wgUser = User::newFromName( $user ); + + +# Setup complete, now start + +$file = fopen( $filename, 'r' ); +if ( !$file ) { + print "Unable to read file, exiting\n"; + exit; +} + +$dbw =& wfGetDB( DB_MASTER ); + +for ( $linenum = 1; !feof( $file ); $linenum++ ) { + $line = fgets( $file ); + if ( $line === false ) { + break; + } + $parts = array_map( 'trim', explode( '|', $line ) ); + if ( count( $parts ) != 2 ) { + print "Error on line $linenum, no pipe character\n"; + continue; + } + $source = Title::newFromText( $parts[0] ); + $dest = Title::newFromText( $parts[1] ); + if ( is_null( $source ) || is_null( $dest ) ) { + print "Invalid title on line $linenum\n"; + continue; + } + + + print $source->getPrefixedText(); + $dbw->begin(); + $err = $source->moveTo( $dest, false, $reason ); + if( $err !== true ) { + print "\nFAILED: $err"; + } + $dbw->immediateCommit(); + print "\n"; + + if ( $interval ) { + sleep( $interval ); + } + wfWaitForSlaves( 5 ); +} + + +?> diff --git a/maintenance/mwdocgen.php b/maintenance/mwdocgen.php new file mode 100644 index 00000000..de1a7d96 --- /dev/null +++ b/maintenance/mwdocgen.php @@ -0,0 +1,205 @@ +<?php +/** + * Script to easily generate the mediawiki documentation using doxygen. + * + * By default it will generate the whole documentation but you will be able to + * generate just some parts. + * + * Usage: + * php mwdocgen.php + * + * Then make a selection from the menu + * + * KNOWN BUGS: + * + * - pass_thru seems to always use buffering (even with ob_implicit_flush()), + * that make output slow when doxygen parses language files. + * - the menu doesnt work, got disabled at revision 13740. Need to code it. + * + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ashar Voultoiz <thoane@altern.org> + * @version first release + */ + +# +# Variables / Configuration +# + +if( php_sapi_name() != 'cli' ) { + echo 'Run me from the command line.'; + die( -1 ); +} + +/** Figure out the base directory for MediaWiki location */ +$mwPath = dirname( dirname( __FILE__ ) ) . DIRECTORY_SEPARATOR; + +/** Global variable: temporary directory */ +$tmpPath = '/tmp/'; + +/** doxygen binary script */ +$doxygenBin = 'doxygen'; + +/** doxygen configuration template for mediawiki */ +$doxygenTemplate = $mwPath . 'maintenance/Doxyfile'; + +/** where Phpdoc should output documentation */ +#$doxyOutput = '/var/www/mwdoc/'; +$doxyOutput = $mwPath . 'docs' . DIRECTORY_SEPARATOR ; + +/** MediaWiki subpaths */ +$mwPathI = $mwPath.'includes/'; +$mwPathL = $mwPath.'languages/'; +$mwPathM = $mwPath.'maintenance/'; +$mwPathS = $mwPath.'skins/'; + +/** Variable to get user input */ +$input = ''; + +/** shell command that will be run */ +$command = $doxygenBin; + +# +# Functions +# + +function readaline( $prompt = '') { + print $prompt; + $fp = fopen( "php://stdin", "r" ); + $resp = trim( fgets( $fp, 1024 ) ); + fclose( $fp ); + return $resp; + } + +/** + * Generate a configuration file given user parameters and return the temporary filename. + * @param $doxygenTemplate String: full path for the template. + * @param $outputDirectory String: directory where the stuff will be output. + * @param $stripFromPath String: path that should be stripped out (usually mediawiki base path). + * @param $input String: Path to analyze. + */ +function generateConfigFile($doxygenTemplate, $outputDirectory, $stripFromPath, $input) { + global $tmpPath ; + + $template = file_get_contents($doxygenTemplate); + + // Replace template placeholders by correct values. + $tmpCfg = str_replace( + array( + '{{OUTPUT_DIRECTORY}}', + '{{STRIP_FROM_PATH}}', + '{{INPUT}}', + ), + array( + $outputDirectory, + $stripFromPath, + $input, + ), + $template + ); + $tmpFileName = $tmpPath . 'mwdocgen'. rand() .'.tmp'; + file_put_contents( $tmpFileName , $tmpCfg ) or die("Could not write doxygen configuration to file $tmpFileName\n"); + + return $tmpFileName; +} + +# +# Main ! +# + +unset( $file ); + +if( is_array( $argv ) && isset( $argv[1] ) ) { + switch( $argv[1] ) { + case '--all': $input = 0; break; + case '--includes': $input = 1; break; + case '--languages': $input = 2; break; + case '--maintenance': $input = 3; break; + case '--skins': $input = 4; break; + case '--file': + $input = 5; + if( isset( $argv[2] ) ) { + $file = $argv[2]; + } + break; + } +} + +if( $input === '' ) { +?>Several documentation possibilities: + 0 : whole documentation (1 + 2 + 3 + 4) + 1 : only includes + 2 : only languages + 3 : only maintenance + 4 : only skins + 5 : only a given file<?php + while ( !is_numeric($input) ) + { + $input = readaline( "\nEnter your choice [0]:" ); + if($input == '') { + $input = 0; + } + } +} +/* +switch ($input) { +case 0: + $command .= " -f $mwBaseFiles -d $mwPathI,$mwPathL,$mwPathM,$mwPathS"; + break; +case 1: + $command .= "-d $mwPathI"; + break; +case 2: + $command .= "-d $mwPathL"; + break; +case 3: + $command .= "-d $mwPathM"; + break; +case 4: + $command .= "-d $mwPathS"; + break; +case 5: + if( !isset( $file ) ) { + $file = readaline("Enter file name $mwPath"); + } + $command .= ' -f '.$mwPath.$file; +} + +$command .= " -t $pdOutput ".$pdOthers; + +*/ + +// TODO : generate a list of paths )) +$input = $mwPath; + +$generatedConf = generateConfigFile($doxygenTemplate, $doxyOutput, $mwPath, $input ); +$command = $doxygenBin . ' ' . $generatedConf ; + +?> +--------------------------------------------------- +Launching the command: + +<?php echo $command ?> + +--------------------------------------------------- +<?php + +passthru($command); + +?> +--------------------------------------------------- +Doxygen execution finished. +Check above for possible errors. + +You might want to deleted the temporary file <?php echo $generatedConf; ?> + +<?php + +# phpdoc -d ./mediawiki/includes/ ./mediawiki/maintenance/ -f ./mediawiki/*php -t ./mwdoc/ -dn 'MediaWiki' --title 'MediaWiki generated documentation' -o 'HTML:frames:DOM/earthli' + +# phpdoc -f ./mediawiki/includes/GlobalFunctions.php -t ./mwdoc/ -dn 'MediaWiki' --title 'MediaWiki generated documentation' -o 'HTML:frames:DOM/earthli' + +?> diff --git a/maintenance/mwdoxygen.cfg b/maintenance/mwdoxygen.cfg new file mode 100644 index 00000000..39fae228 --- /dev/null +++ b/maintenance/mwdoxygen.cfg @@ -0,0 +1,1136 @@ +# Doxyfile 1.4.3-20050530 + +# +# NOTE: this configuration assume you are running doxygen from the +# mediawiki root directory. For example: +# ~/dev/mediawiki-HEAD/ +# The easiest way is to get in the maintenance directory and then: +# make doxydoc +# +# Paths visited are configured by the INPUT variable (around line 450) + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for the MediaWiki project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +PROJECT_NAME = MediaWiki +PROJECT_NUMBER = 1.6-cvs +OUTPUT_DIRECTORY = docs + +# 2 levels directories, create 4096 of them! +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, +# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, +# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, +# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, +# Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +USE_WINDOWS_ENCODING = NO + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. +ALWAYS_DETAILED_SEC = YES + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explicit @brief command for a brief description. +JAVADOC_AUTOBRIEF = YES + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. +DETAILS_AT_TOP = YES + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. +INHERIT_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +DISTRIBUTE_GROUP_DOC = NO + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. +ALIASES = + + +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. +SUBGROUPING = YES + + + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. +EXTRACT_PRIVATE = YES + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. +EXTRACT_LOCAL_METHODS = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. +SORT_BY_SCOPE_NAME = NO + +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. +SHOW_DIRECTORIES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from the +# version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the progam writes to standard output +# is used as the file version. See the manual for examples. +FILE_VERSION_FILTER = + + + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +# should be run from maintenance +# FIXME : includes/normal includes/templates languages are missing +INPUT = config includes maintenance skins tests + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm + +FILE_PATTERNS = *.php *.inc + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + + + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = YES + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = NO + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that a graph may be further truncated if the graph's +# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH +# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default), +# the graph is not depth-constrained. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, which results in a white background. +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/maintenance/mysql5/tables.sql b/maintenance/mysql5/tables.sql new file mode 100644 index 00000000..cc6818d3 --- /dev/null +++ b/maintenance/mysql5/tables.sql @@ -0,0 +1,1009 @@ +-- Experimental table definitions for MySQL 4.1 and 5.0 with +-- explicit character set support. Not fully tested, may have +-- surprises! +-- +-- TODO: Test various fields +-- TODO: Anything else need to be moved to VARBINARY and BLOB? +-- TODO: UCS-2 better than UTF-8? +-- TODO: Find out how to get 4-byte UTF-8 chars into MySQL... +-- An alternate UCS-2 that does UTF-16 conversion would work. +-- TODO: Work on collation usage + +-- ------------------------------------------------------------ + +-- SQL to create the initial tables for the MediaWiki database. +-- This is read and executed by the install script; you should +-- not have to run it by itself unless doing a manual install. + +-- +-- General notes: +-- +-- If possible, create tables as InnoDB to benefit from the +-- superior resiliency against crashes and ability to read +-- during writes (and write during reads!) +-- +-- Only the 'searchindex' table requires MyISAM due to the +-- requirement for fulltext index support, which is missing +-- from InnoDB. +-- +-- +-- The MySQL table backend for MediaWiki currently uses +-- 14-character CHAR or VARCHAR fields to store timestamps. +-- The format is YYYYMMDDHHMMSS, which is derived from the +-- text format of MySQL's TIMESTAMP fields. +-- +-- Historically TIMESTAMP fields were used, but abandoned +-- in early 2002 after a lot of trouble with the fields +-- auto-updating. +-- +-- The PostgreSQL backend uses DATETIME fields for timestamps, +-- and we will migrate the MySQL definitions at some point as +-- well. +-- +-- +-- The /*$wgDBprefix*/ comments in this and other files are +-- replaced with the defined table prefix by the installer +-- and updater scripts. If you are installing or running +-- updates manually, you will need to manually insert the +-- table prefix if any when running these scripts. +-- + + +-- +-- The user table contains basic account information, +-- authentication keys, etc. +-- +-- Some multi-wiki sites may share a single central user table +-- between separate wikis using the $wgSharedDB setting. +-- +-- Note that when a external authentication plugin is used, +-- user table entries still need to be created to store +-- preferences and to key tracking information in the other +-- tables. +-- +CREATE TABLE /*$wgDBprefix*/user ( + user_id int(5) unsigned NOT NULL auto_increment, + + -- Usernames must be unique, must not be in the form of + -- an IP address. _Shouldn't_ allow slashes or case + -- conflicts. Spaces are allowed, and are _not_ converted + -- to underscores like titles. See the User::newFromName() for + -- the specific tests that usernames have to pass. + user_name varchar(255) binary NOT NULL default '', + + -- Optional 'real name' to be displayed in credit listings + user_real_name varchar(255) binary NOT NULL default '', + + -- Password hashes, normally hashed like so: + -- MD5(CONCAT(user_id,'-',MD5(plaintext_password))), see + -- wfEncryptPassword() in GlobalFunctions.php + user_password tinyblob NOT NULL default '', + + -- When using 'mail me a new password', a random + -- password is generated and the hash stored here. + -- The previous password is left in place until + -- someone actually logs in with the new password, + -- at which point the hash is moved to user_password + -- and the old password is invalidated. + user_newpassword tinyblob NOT NULL default '', + + -- Note: email should be restricted, not public info. + -- Same with passwords. + user_email tinytext NOT NULL default '', + + -- Newline-separated list of name=value defining the user + -- preferences + user_options blob NOT NULL default '', + + -- This is a timestamp which is updated when a user + -- logs in, logs out, changes preferences, or performs + -- some other action requiring HTML cache invalidation + -- to ensure that the UI is updated. + user_touched char(14) binary NOT NULL default '', + + -- A pseudorandomly generated value that is stored in + -- a cookie when the "remember password" feature is + -- used (previously, a hash of the password was used, but + -- this was vulnerable to cookie-stealing attacks) + user_token char(32) binary NOT NULL default '', + + -- Initially NULL; when a user's e-mail address has been + -- validated by returning with a mailed token, this is + -- set to the current timestamp. + user_email_authenticated CHAR(14) BINARY, + + -- Randomly generated token created when the e-mail address + -- is set and a confirmation test mail sent. + user_email_token CHAR(32) BINARY, + + -- Expiration date for the user_email_token + user_email_token_expires CHAR(14) BINARY, + + -- Timestamp of account registration. + -- Accounts predating this schema addition may contain NULL. + user_registration CHAR(14) BINARY, + + PRIMARY KEY user_id (user_id), + UNIQUE INDEX user_name (user_name), + INDEX (user_email_token) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- User permissions have been broken out to a separate table; +-- this allows sites with a shared user table to have different +-- permissions assigned to a user in each project. +-- +-- This table replaces the old user_rights field which used a +-- comma-separated blob. +-- +CREATE TABLE /*$wgDBprefix*/user_groups ( + -- Key to user_id + ug_user int(5) unsigned NOT NULL default '0', + + -- Group names are short symbolic string keys. + -- The set of group names is open-ended, though in practice + -- only some predefined ones are likely to be used. + -- + -- At runtime $wgGroupPermissions will associate group keys + -- with particular permissions. A user will have the combined + -- permissions of any group they're explicitly in, plus + -- the implicit '*' and 'user' groups. + ug_group char(16) NOT NULL default '', + + PRIMARY KEY (ug_user,ug_group), + KEY (ug_group) +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- Stores notifications of user talk page changes, for the display +-- of the "you have new messages" box +CREATE TABLE /*$wgDBprefix*/user_newtalk ( + -- Key to user.user_id + user_id int(5) NOT NULL default '0', + -- If the user is an anonymous user hir IP address is stored here + -- since the user_id of 0 is ambiguous + user_ip varchar(40) NOT NULL default '', + INDEX user_id (user_id), + INDEX user_ip (user_ip) +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Core of the wiki: each page has an entry here which identifies +-- it by title and contains some essential metadata. +-- +CREATE TABLE /*$wgDBprefix*/page ( + -- Unique identifier number. The page_id will be preserved across + -- edits and rename operations, but not deletions and recreations. + page_id int(8) unsigned NOT NULL auto_increment, + + -- A page name is broken into a namespace and a title. + -- The namespace keys are UI-language-independent constants, + -- defined in includes/Defines.php + page_namespace int NOT NULL, + + -- The rest of the title, as text. + -- Spaces are transformed into underscores in title storage. + page_title varchar(255) binary NOT NULL, + + -- Comma-separated set of permission keys indicating who + -- can move or edit the page. + page_restrictions tinyblob NOT NULL default '', + + -- Number of times this page has been viewed. + page_counter bigint(20) unsigned NOT NULL default '0', + + -- 1 indicates the article is a redirect. + page_is_redirect tinyint(1) unsigned NOT NULL default '0', + + -- 1 indicates this is a new entry, with only one edit. + -- Not all pages with one edit are new pages. + page_is_new tinyint(1) unsigned NOT NULL default '0', + + -- Random value between 0 and 1, used for Special:Randompage + page_random real unsigned NOT NULL, + + -- This timestamp is updated whenever the page changes in + -- a way requiring it to be re-rendered, invalidating caches. + -- Aside from editing this includes permission changes, + -- creation or deletion of linked pages, and alteration + -- of contained templates. + page_touched char(14) binary NOT NULL default '', + + -- Handy key to revision.rev_id of the current revision. + -- This may be 0 during page creation, but that shouldn't + -- happen outside of a transaction... hopefully. + page_latest int(8) unsigned NOT NULL, + + -- Uncompressed length in bytes of the page's current source text. + page_len int(8) unsigned NOT NULL, + + PRIMARY KEY page_id (page_id), + UNIQUE INDEX name_title (page_namespace,page_title), + + -- Special-purpose indexes + INDEX (page_random), + INDEX (page_len) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Every edit of a page creates also a revision row. +-- This stores metadata about the revision, and a reference +-- to the text storage backend. +-- +CREATE TABLE /*$wgDBprefix*/revision ( + rev_id int(8) unsigned NOT NULL auto_increment, + + -- Key to page_id. This should _never_ be invalid. + rev_page int(8) unsigned NOT NULL, + + -- Key to text.old_id, where the actual bulk text is stored. + -- It's possible for multiple revisions to use the same text, + -- for instance revisions where only metadata is altered + -- or a rollback to a previous version. + rev_text_id int(8) unsigned NOT NULL, + + -- Text comment summarizing the change. + -- This text is shown in the history and other changes lists, + -- rendered in a subset of wiki markup by Linker::formatComment() + rev_comment tinyblob NOT NULL default '', + + -- Key to user.user_id of the user who made this edit. + -- Stores 0 for anonymous edits and for some mass imports. + rev_user int(5) unsigned NOT NULL default '0', + + -- Text username or IP address of the editor. + rev_user_text varchar(255) binary NOT NULL default '', + + -- Timestamp + rev_timestamp char(14) binary NOT NULL default '', + + -- Records whether the user marked the 'minor edit' checkbox. + -- Many automated edits are marked as minor. + rev_minor_edit tinyint(1) unsigned NOT NULL default '0', + + -- Not yet used; reserved for future changes to the deletion system. + rev_deleted tinyint(1) unsigned NOT NULL default '0', + + PRIMARY KEY rev_page_id (rev_page, rev_id), + UNIQUE INDEX rev_id (rev_id), + INDEX rev_timestamp (rev_timestamp), + INDEX page_timestamp (rev_page,rev_timestamp), + INDEX user_timestamp (rev_user,rev_timestamp), + INDEX usertext_timestamp (rev_user_text,rev_timestamp) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Holds text of individual page revisions. +-- +-- Field names are a holdover from the 'old' revisions table in +-- MediaWiki 1.4 and earlier: an upgrade will transform that +-- table into the 'text' table to minimize unnecessary churning +-- and downtime. If upgrading, the other fields will be left unused. +-- +CREATE TABLE /*$wgDBprefix*/text ( + -- Unique text storage key number. + -- Note that the 'oldid' parameter used in URLs does *not* + -- refer to this number anymore, but to rev_id. + -- + -- revision.rev_text_id is a key to this column + old_id int(8) unsigned NOT NULL auto_increment, + + -- Depending on the contents of the old_flags field, the text + -- may be convenient plain text, or it may be funkily encoded. + old_text mediumblob NOT NULL default '', + + -- Comma-separated list of flags: + -- gzip: text is compressed with PHP's gzdeflate() function. + -- utf8: text was stored as UTF-8. + -- If $wgLegacyEncoding option is on, rows *without* this flag + -- will be converted to UTF-8 transparently at load time. + -- object: text field contained a serialized PHP object. + -- The object either contains multiple versions compressed + -- together to achieve a better compression ratio, or it refers + -- to another row where the text can be found. + old_flags tinyblob NOT NULL default '', + + PRIMARY KEY old_id (old_id) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Holding area for deleted articles, which may be viewed +-- or restored by admins through the Special:Undelete interface. +-- The fields generally correspond to the page, revision, and text +-- fields, with several caveats. +-- +CREATE TABLE /*$wgDBprefix*/archive ( + ar_namespace int NOT NULL default '0', + ar_title varchar(255) binary NOT NULL default '', + + -- Newly deleted pages will not store text in this table, + -- but will reference the separately existing text rows. + -- This field is retained for backwards compatibility, + -- so old archived pages will remain accessible after + -- upgrading from 1.4 to 1.5. + -- Text may be gzipped or otherwise funky. + ar_text mediumblob NOT NULL default '', + + -- Basic revision stuff... + ar_comment tinyblob NOT NULL default '', + ar_user int(5) unsigned NOT NULL default '0', + ar_user_text varchar(255) binary NOT NULL, + ar_timestamp char(14) binary NOT NULL default '', + ar_minor_edit tinyint(1) NOT NULL default '0', + + -- See ar_text note. + ar_flags tinyblob NOT NULL default '', + + -- When revisions are deleted, their unique rev_id is stored + -- here so it can be retained after undeletion. This is necessary + -- to retain permalinks to given revisions after accidental delete + -- cycles or messy operations like history merges. + -- + -- Old entries from 1.4 will be NULL here, and a new rev_id will + -- be created on undeletion for those revisions. + ar_rev_id int(8) unsigned, + + -- For newly deleted revisions, this is the text.old_id key to the + -- actual stored text. To avoid breaking the block-compression scheme + -- and otherwise making storage changes harder, the actual text is + -- *not* deleted from the text table, merely hidden by removal of the + -- page and revision entries. + -- + -- Old entries deleted under 1.2-1.4 will have NULL here, and their + -- ar_text and ar_flags fields will be used to create a new text + -- row upon undeletion. + ar_text_id int(8) unsigned, + + KEY name_title_timestamp (ar_namespace,ar_title,ar_timestamp) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Track page-to-page hyperlinks within the wiki. +-- +CREATE TABLE /*$wgDBprefix*/pagelinks ( + -- Key to the page_id of the page containing the link. + pl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + pl_namespace int NOT NULL default '0', + pl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY pl_from(pl_from,pl_namespace,pl_title), + KEY (pl_namespace,pl_title) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Track template inclusions. +-- +CREATE TABLE /*$wgDBprefix*/templatelinks ( + -- Key to the page_id of the page containing the link. + tl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + tl_namespace int NOT NULL default '0', + tl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + KEY (tl_namespace,tl_title) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Track links to images *used inline* +-- We don't distinguish live from broken links here, so +-- they do not need to be changed on upload/removal. +-- +CREATE TABLE /*$wgDBprefix*/imagelinks ( + -- Key to page_id of the page containing the image / media link. + il_from int(8) unsigned NOT NULL default '0', + + -- Filename of target image. + -- This is also the page_title of the file's description page; + -- all such pages are in namespace 6 (NS_IMAGE). + il_to varchar(255) binary NOT NULL default '', + + UNIQUE KEY il_from(il_from,il_to), + KEY (il_to) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Track category inclusions *used inline* +-- This tracks a single level of category membership +-- (folksonomic tagging, really). +-- +CREATE TABLE /*$wgDBprefix*/categorylinks ( + -- Key to page_id of the page defined as a category member. + cl_from int(8) unsigned NOT NULL default '0', + + -- Name of the category. + -- This is also the page_title of the category's description page; + -- all such pages are in namespace 14 (NS_CATEGORY). + cl_to varchar(255) binary NOT NULL default '', + + -- The title of the linking page, or an optional override + -- to determine sort order. Sorting is by binary order, which + -- isn't always ideal, but collations seem to be an exciting + -- and dangerous new world in MySQL... + -- + -- For MySQL 4.1+ with charset set to utf8, the sort key *index* + -- needs cut to be smaller than 1024 bytes (at 3 bytes per char). + -- To sort properly on the shorter key, this field needs to be + -- the same shortness. + cl_sortkey varchar(86) binary NOT NULL default '', + + -- This isn't really used at present. Provided for an optional + -- sorting method by approximate addition time. + cl_timestamp timestamp NOT NULL, + + UNIQUE KEY cl_from(cl_from,cl_to), + + -- We always sort within a given category... + KEY cl_sortkey(cl_to,cl_sortkey), + + -- Not really used? + KEY cl_timestamp(cl_to,cl_timestamp) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Track links to external URLs +-- +CREATE TABLE /*$wgDBprefix*/externallinks ( + -- page_id of the referring page + el_from int(8) unsigned NOT NULL default '0', + + -- The URL + el_to blob NOT NULL default '', + + -- In the case of HTTP URLs, this is the URL with any username or password + -- removed, and with the labels in the hostname reversed and converted to + -- lower case. An extra dot is added to allow for matching of either + -- example.com or *.example.com in a single scan. + -- Example: + -- http://user:password@sub.example.com/page.html + -- becomes + -- http://com.example.sub./page.html + -- which allows for fast searching for all pages under example.com with the + -- clause: + -- WHERE el_index LIKE 'http://com.example.%' + el_index blob NOT NULL default '', + + KEY (el_from, el_to(40)), + KEY (el_to(60), el_from), + KEY (el_index(60)) +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Track interlanguage links +-- +CREATE TABLE /*$wgDBprefix*/langlinks ( + -- page_id of the referring page + ll_from int(8) unsigned NOT NULL default '0', + + -- Language code of the target + ll_lang varchar(10) binary NOT NULL default '', + + -- Title of the target, including namespace + ll_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY (ll_from, ll_lang), + KEY (ll_lang, ll_title) +) ENGINE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Contains a single row with some aggregate info +-- on the state of the site. +-- +CREATE TABLE /*$wgDBprefix*/site_stats ( + -- The single row should contain 1 here. + ss_row_id int(8) unsigned NOT NULL, + + -- Total number of page views, if hit counters are enabled. + ss_total_views bigint(20) unsigned default '0', + + -- Total number of edits performed. + ss_total_edits bigint(20) unsigned default '0', + + -- An approximate count of pages matching the following criteria: + -- * in namespace 0 + -- * not a redirect + -- * contains the text '[[' + -- See Article::isCountable() in includes/Article.php + ss_good_articles bigint(20) unsigned default '0', + + -- Total pages, theoretically equal to SELECT COUNT(*) FROM page; except faster + ss_total_pages bigint(20) default '-1', + + -- Number of users, theoretically equal to SELECT COUNT(*) FROM user; + ss_users bigint(20) default '-1', + + -- Deprecated, no longer updated as of 1.5 + ss_admins int(10) default '-1', + + -- Number of images, equivalent to SELECT COUNT(*) FROM image + ss_images int(10) default '0', + + UNIQUE KEY ss_row_id (ss_row_id) + +) TYPE=InnoDB; + +-- +-- Stores an ID for every time any article is visited; +-- depending on $wgHitcounterUpdateFreq, it is +-- periodically cleared and the page_counter column +-- in the page table updated for the all articles +-- that have been visited.) +-- +CREATE TABLE /*$wgDBprefix*/hitcounter ( + hc_id INTEGER UNSIGNED NOT NULL +) TYPE=HEAP MAX_ROWS=25000; + + +-- +-- The internet is full of jerks, alas. Sometimes it's handy +-- to block a vandal or troll account. +-- +CREATE TABLE /*$wgDBprefix*/ipblocks ( + -- Primary key, introduced for privacy. + ipb_id int(8) NOT NULL auto_increment, + + -- Blocked IP address in dotted-quad form or user name. + ipb_address varchar(40) binary NOT NULL default '', + + -- Blocked user ID or 0 for IP blocks. + ipb_user int(8) unsigned NOT NULL default '0', + + -- User ID who made the block. + ipb_by int(8) unsigned NOT NULL default '0', + + -- Text comment made by blocker. + ipb_reason tinyblob NOT NULL default '', + + -- Creation (or refresh) date in standard YMDHMS form. + -- IP blocks expire automatically. + ipb_timestamp char(14) binary NOT NULL default '', + + -- Indicates that the IP address was banned because a banned + -- user accessed a page through it. If this is 1, ipb_address + -- will be hidden, and the block identified by block ID number. + ipb_auto tinyint(1) NOT NULL default '0', + + -- Time at which the block will expire. + ipb_expiry char(14) binary NOT NULL default '', + + -- Start and end of an address range, in hexadecimal + -- Size chosen to allow IPv6 + ipb_range_start varchar(32) NOT NULL default '', + ipb_range_end varchar(32) NOT NULL default '', + + PRIMARY KEY ipb_id (ipb_id), + INDEX ipb_address (ipb_address), + INDEX ipb_user (ipb_user), + INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Uploaded images and other files. +-- +CREATE TABLE /*$wgDBprefix*/image ( + -- Filename. + -- This is also the title of the associated description page, + -- which will be in namespace 6 (NS_IMAGE). + img_name varchar(255) binary NOT NULL default '', + + -- File size in bytes. + img_size int(8) unsigned NOT NULL default '0', + + -- For images, size in pixels. + img_width int(5) NOT NULL default '0', + img_height int(5) NOT NULL default '0', + + -- Extracted EXIF metadata stored as a serialized PHP array. + img_metadata mediumblob NOT NULL, + + -- For images, bits per pixel if known. + img_bits int(3) NOT NULL default '0', + + -- Media type as defined by the MEDIATYPE_xxx constants + img_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + + -- major part of a MIME media type as defined by IANA + -- see http://www.iana.org/assignments/media-types/ + img_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") NOT NULL default "unknown", + + -- minor part of a MIME media type as defined by IANA + -- the minor parts are not required to adher to any standard + -- but should be consistent throughout the database + -- see http://www.iana.org/assignments/media-types/ + img_minor_mime varchar(32) NOT NULL default "unknown", + + -- Description field as entered by the uploader. + -- This is displayed in image upload history and logs. + img_description tinyblob NOT NULL default '', + + -- user_id and user_name of uploader. + img_user int(5) unsigned NOT NULL default '0', + img_user_text varchar(255) binary NOT NULL default '', + + -- Time of the upload. + img_timestamp char(14) binary NOT NULL default '', + + PRIMARY KEY img_name (img_name), + + -- Used by Special:Imagelist for sort-by-size + INDEX img_size (img_size), + + -- Used by Special:Newimages and Special:Imagelist + INDEX img_timestamp (img_timestamp) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Previous revisions of uploaded files. +-- Awkwardly, image rows have to be moved into +-- this table at re-upload time. +-- +CREATE TABLE /*$wgDBprefix*/oldimage ( + -- Base filename: key to image.img_name + oi_name varchar(255) binary NOT NULL default '', + + -- Filename of the archived file. + -- This is generally a timestamp and '!' prepended to the base name. + oi_archive_name varchar(255) binary NOT NULL default '', + + -- Other fields as in image... + oi_size int(8) unsigned NOT NULL default 0, + oi_width int(5) NOT NULL default 0, + oi_height int(5) NOT NULL default 0, + oi_bits int(3) NOT NULL default 0, + oi_description tinyblob NOT NULL default '', + oi_user int(5) unsigned NOT NULL default '0', + oi_user_text varchar(255) binary NOT NULL default '', + oi_timestamp char(14) binary NOT NULL default '', + + INDEX oi_name (oi_name(10)) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Record of deleted file data +-- +CREATE TABLE /*$wgDBprefix*/filearchive ( + -- Unique row id + fa_id int not null auto_increment, + + -- Original base filename; key to image.img_name, page.page_title, etc + fa_name varchar(255) binary NOT NULL default '', + + -- Filename of archived file, if an old revision + fa_archive_name varchar(255) binary default '', + + -- Which storage bin (directory tree or object store) the file data + -- is stored in. Should be 'deleted' for files that have been deleted; + -- any other bin is not yet in use. + fa_storage_group varchar(16), + + -- SHA-1 of the file contents plus extension, used as a key for storage. + -- eg 8f8a562add37052a1848ff7771a2c515db94baa9.jpg + -- + -- If NULL, the file was missing at deletion time or has been purged + -- from the archival storage. + fa_storage_key varchar(64) binary default '', + + -- Deletion information, if this file is deleted. + fa_deleted_user int, + fa_deleted_timestamp char(14) binary default '', + fa_deleted_reason text, + + -- Duped fields from image + fa_size int(8) unsigned default '0', + fa_width int(5) default '0', + fa_height int(5) default '0', + fa_metadata mediumblob, + fa_bits int(3) default '0', + fa_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + fa_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") default "unknown", + fa_minor_mime varchar(32) default "unknown", + fa_description tinyblob default '', + fa_user int(5) unsigned default '0', + fa_user_text varchar(255) binary default '', + fa_timestamp char(14) binary default '', + + PRIMARY KEY (fa_id), + INDEX (fa_name, fa_timestamp), -- pick out by image name + INDEX (fa_storage_group, fa_storage_key), -- pick out dupe files + INDEX (fa_deleted_timestamp), -- sort by deletion time + INDEX (fa_deleted_user) -- sort by deleter + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Primarily a summary table for Special:Recentchanges, +-- this table contains some additional info on edits from +-- the last few days, see Article::editUpdates() +-- +CREATE TABLE /*$wgDBprefix*/recentchanges ( + rc_id int(8) NOT NULL auto_increment, + rc_timestamp varchar(14) binary NOT NULL default '', + rc_cur_time varchar(14) binary NOT NULL default '', + + -- As in revision + rc_user int(10) unsigned NOT NULL default '0', + rc_user_text varchar(255) binary NOT NULL default '', + + -- When pages are renamed, their RC entries do _not_ change. + rc_namespace int NOT NULL default '0', + rc_title varchar(255) binary NOT NULL default '', + + -- as in revision... + rc_comment varchar(255) binary NOT NULL default '', + rc_minor tinyint(3) unsigned NOT NULL default '0', + + -- Edits by user accounts with the 'bot' rights key are + -- marked with a 1 here, and will be hidden from the + -- default view. + rc_bot tinyint(3) unsigned NOT NULL default '0', + + rc_new tinyint(3) unsigned NOT NULL default '0', + + -- Key to page_id (was cur_id prior to 1.5). + -- This will keep links working after moves while + -- retaining the at-the-time name in the changes list. + rc_cur_id int(10) unsigned NOT NULL default '0', + + -- rev_id of the given revision + rc_this_oldid int(10) unsigned NOT NULL default '0', + + -- rev_id of the prior revision, for generating diff links. + rc_last_oldid int(10) unsigned NOT NULL default '0', + + -- These may no longer be used, with the new move log. + rc_type tinyint(3) unsigned NOT NULL default '0', + rc_moved_to_ns tinyint(3) unsigned NOT NULL default '0', + rc_moved_to_title varchar(255) binary NOT NULL default '', + + -- If the Recent Changes Patrol option is enabled, + -- users may mark edits as having been reviewed to + -- remove a warning flag on the RC list. + -- A value of 1 indicates the page has been reviewed. + rc_patrolled tinyint(3) unsigned NOT NULL default '0', + + -- Recorded IP address the edit was made from, if the + -- $wgPutIPinRC option is enabled. + rc_ip char(15) NOT NULL default '', + + PRIMARY KEY rc_id (rc_id), + INDEX rc_timestamp (rc_timestamp), + INDEX rc_namespace_title (rc_namespace, rc_title), + INDEX rc_cur_id (rc_cur_id), + INDEX new_name_timestamp(rc_new,rc_namespace,rc_timestamp), + INDEX rc_ip (rc_ip) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +CREATE TABLE /*$wgDBprefix*/watchlist ( + -- Key to user.user_id + wl_user int(5) unsigned NOT NULL, + + -- Key to page_namespace/page_title + -- Note that users may watch pages which do not exist yet, + -- or existed in the past but have been deleted. + wl_namespace int NOT NULL default '0', + wl_title varchar(255) binary NOT NULL default '', + + -- Timestamp when user was last sent a notification e-mail; + -- cleared when the user visits the page. + wl_notificationtimestamp varchar(14) binary, + + UNIQUE KEY (wl_user, wl_namespace, wl_title), + KEY namespace_title (wl_namespace,wl_title) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + +-- +-- Used by the math module to keep track +-- of previously-rendered items. +-- +CREATE TABLE /*$wgDBprefix*/math ( + -- Binary MD5 hash of the latex fragment, used as an identifier key. + math_inputhash varbinary(16) NOT NULL, + + -- Not sure what this is, exactly... + math_outputhash varbinary(16) NOT NULL, + + -- texvc reports how well it thinks the HTML conversion worked; + -- if it's a low level the PNG rendering may be preferred. + math_html_conservativeness tinyint(1) NOT NULL, + + -- HTML output from texvc, if any + math_html text, + + -- MathML output from texvc, if any + math_mathml text, + + UNIQUE KEY math_inputhash (math_inputhash) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- When using the default MySQL search backend, page titles +-- and text are munged to strip markup, do Unicode case folding, +-- and prepare the result for MySQL's fulltext index. +-- +-- This table must be MyISAM; InnoDB does not support the needed +-- fulltext index. +-- +CREATE TABLE /*$wgDBprefix*/searchindex ( + -- Key to page_id + si_page int(8) unsigned NOT NULL, + + -- Munged version of title + si_title varchar(255) NOT NULL default '', + + -- Munged version of body text + si_text mediumtext NOT NULL default '', + + UNIQUE KEY (si_page), + FULLTEXT si_title (si_title), + FULLTEXT si_text (si_text) + +) TYPE=MyISAM, DEFAULT CHARSET=utf8; + +-- +-- Recognized interwiki link prefixes +-- +CREATE TABLE /*$wgDBprefix*/interwiki ( + -- The interwiki prefix, (e.g. "Meatball", or the language prefix "de") + iw_prefix char(32) NOT NULL, + + -- The URL of the wiki, with "$1" as a placeholder for an article name. + -- Any spaces in the name will be transformed to underscores before + -- insertion. + iw_url char(127) NOT NULL, + + -- A boolean value indicating whether the wiki is in this project + -- (used, for example, to detect redirect loops) + iw_local BOOL NOT NULL, + + -- Boolean value indicating whether interwiki transclusions are allowed. + iw_trans TINYINT(1) NOT NULL DEFAULT 0, + + UNIQUE KEY iw_prefix (iw_prefix) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Used for caching expensive grouped queries +-- +CREATE TABLE /*$wgDBprefix*/querycache ( + -- A key name, generally the base name of of the special page. + qc_type char(32) NOT NULL, + + -- Some sort of stored value. Sizes, counts... + qc_value int(5) unsigned NOT NULL default '0', + + -- Target namespace+title + qc_namespace int NOT NULL default '0', + qc_title char(255) binary NOT NULL default '', + + KEY (qc_type,qc_value) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- For a few generic cache operations if not using Memcached +-- +CREATE TABLE /*$wgDBprefix*/objectcache ( + keyname char(255) binary not null default '', + value mediumblob, + exptime datetime, + unique key (keyname), + key (exptime) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- +-- Cache of interwiki transclusion +-- +CREATE TABLE /*$wgDBprefix*/transcache ( + tc_url VARCHAR(255) NOT NULL, + tc_contents TEXT, + tc_time INT NOT NULL, + UNIQUE INDEX tc_url_idx(tc_url) +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +CREATE TABLE /*$wgDBprefix*/logging ( + -- Symbolic keys for the general log type and the action type + -- within the log. The output format will be controlled by the + -- action field, but only the type controls categorization. + log_type char(10) NOT NULL default '', + log_action char(10) NOT NULL default '', + + -- Timestamp. Duh. + log_timestamp char(14) NOT NULL default '19700101000000', + + -- The user who performed this action; key to user_id + log_user int unsigned NOT NULL default 0, + + -- Key to the page affected. Where a user is the target, + -- this will point to the user page. + log_namespace int NOT NULL default 0, + log_title varchar(255) binary NOT NULL default '', + + -- Freeform text. Interpreted as edit history comments. + log_comment varchar(255) NOT NULL default '', + + -- LF separated list of miscellaneous parameters + log_params blob NOT NULL default '', + + KEY type_time (log_type, log_timestamp), + KEY user_time (log_user, log_timestamp), + KEY page_time (log_namespace, log_title, log_timestamp) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +CREATE TABLE /*$wgDBprefix*/trackbacks ( + tb_id integer AUTO_INCREMENT PRIMARY KEY, + tb_page integer REFERENCES page(page_id) ON DELETE CASCADE, + tb_title varchar(255) NOT NULL, + tb_url varchar(255) NOT NULL, + tb_ex text, + tb_name varchar(255), + + INDEX (tb_page) +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- Jobs performed by parallel apache threads or a command-line daemon +CREATE TABLE /*$wgDBprefix*/job ( + job_id int(9) unsigned NOT NULL auto_increment, + + -- Command name, currently only refreshLinks is defined + job_cmd varchar(255) NOT NULL default '', + + -- Namespace and title to act on + -- Should be 0 and '' if the command does not operate on a title + job_namespace int NOT NULL, + job_title varchar(255) binary NOT NULL, + + -- Any other parameters to the command + -- Presently unused, format undefined + job_params blob NOT NULL default '', + + PRIMARY KEY job_id (job_id), + KEY (job_cmd, job_namespace, job_title) +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + +-- Details of updates to cached special pages +CREATE TABLE /*$wgDBprefix*/querycache_info ( + + -- Special page name + -- Corresponds to a qc_type value + qci_type varchar(32) NOT NULL default '', + + -- Timestamp of last update + qci_timestamp char(14) NOT NULL default '19700101000000', + + UNIQUE KEY ( qci_type ) + +) TYPE=InnoDB;
\ No newline at end of file diff --git a/maintenance/namespace2sql.php b/maintenance/namespace2sql.php new file mode 100644 index 00000000..8084bfec --- /dev/null +++ b/maintenance/namespace2sql.php @@ -0,0 +1,14 @@ +<?php +# +# Print SQL to insert namespace names into database. +# This source code is in the public domain. + +require_once( "commandLine.inc" ); + +for ($i = -2; $i < 16; ++$i) { + $nsname = wfStrencode( $wgLang->getNsText( $i ) ); + $dbname = wfStrencode( $wgDBname ); + print "INSERT INTO ns_name(ns_db, ns_num, ns_name) VALUES('$dbname', $i, '$nsname');\n"; +} + +?> diff --git a/maintenance/namespaceDupes.php b/maintenance/namespaceDupes.php new file mode 100644 index 00000000..ad56eee7 --- /dev/null +++ b/maintenance/namespaceDupes.php @@ -0,0 +1,194 @@ +<?php +# Copyright (C) 2005 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +$options = array( 'fix', 'suffix', 'help' ); + +/** */ +require_once( 'commandLine.inc' ); +#require_once( 'maintenance/userDupes.inc' ); + +if(isset( $options['help'] ) ) { +print <<<END +usage: namespaceDupes.php [--fix] [--suffix=<text>] [--help] + --help : this help message + --fix : attempt to automatically fix errors + --suffix=<text> : dupes will be renamed with correct namespace with <text> + appended after the article name. + +END; +die; +} + +class NamespaceConflictChecker { + function NamespaceConflictChecker( &$db ) { + $this->db =& $db; + } + + function checkAll( $fix, $suffix = '' ) { + global $wgContLang; + $spaces = $wgContLang->getNamespaces(); + $ok = true; + foreach( $spaces as $ns => $name ) { + $ok = $this->checkNamespace( $ns, $name, $fix, $suffix ) && $ok; + } + return $ok; + } + + function checkNamespace( $ns, $name, $fix, $suffix = '' ) { + echo "Checking namespace $ns: \"$name\"\n"; + if( $name == '' ) { + echo "... skipping article namespace\n"; + return true; + } + + $conflicts = $this->getConflicts( $ns, $name ); + $count = count( $conflicts ); + if( $count == 0 ) { + echo "... no conflicts detected!\n"; + return true; + } + + echo "... $count conflicts detected:\n"; + $ok = true; + foreach( $conflicts as $row ) { + $resolvable = $this->reportConflict( $row, $suffix ); + $ok = $ok && $resolvable; + if( $fix && ( $resolvable || $suffix != '' ) ) { + $ok = $this->resolveConflict( $row, $resolvable, $suffix ) && $ok; + } + } + return $ok; + } + + /** + * @fixme: do this for reals + */ + function checkPrefix( $key, $prefix, $fix, $suffix = '' ) { + echo "Checking prefix \"$prefix\" vs namespace $key\n"; + return $this->checkNamespace( $key, $prefix, $fix, $suffix ); + } + + function getConflicts( $ns, $name ) { + $page = $this->newSchema() ? 'page' : 'cur'; + $table = $this->db->tableName( $page ); + + $prefix = $this->db->strencode( $name ); + $likeprefix = str_replace( '_', '\\_', $prefix); + + $sql = "SELECT {$page}_id AS id, + {$page}_title AS oldtitle, + $ns AS namespace, + TRIM(LEADING '$prefix:' FROM {$page}_title) AS title + FROM {$table} + WHERE {$page}_namespace=0 + AND {$page}_title LIKE '$likeprefix:%'"; + + $result = $this->db->query( $sql, 'NamespaceConflictChecker::getConflicts' ); + + $set = array(); + while( $row = $this->db->fetchObject( $result ) ) { + $set[] = $row; + } + $this->db->freeResult( $result ); + + return $set; + } + + function reportConflict( $row, $suffix ) { + $newTitle = Title::makeTitle( $row->namespace, $row->title ); + printf( "... %d (0,\"%s\") -> (%d,\"%s\") [[%s]]\n", + $row->id, + $row->oldtitle, + $row->namespace, + $row->title, + $newTitle->getPrefixedText() ); + + $id = $newTitle->getArticleId(); + if( $id ) { + echo "... *** cannot resolve automatically; page exists with ID $id ***\n"; + return false; + } else { + return true; + } + } + + function resolveConflict( $row, $resolvable, $suffix ) { + if( !$resolvable ) { + $row->title .= $suffix; + $title = Title::makeTitle( $row->namespace, $row->title ); + echo "... *** using suffixed form [[" . $title->getPrefixedText() . "]] ***\n"; + } + $tables = $this->newSchema() + ? array( 'page' ) + : array( 'cur', 'old' ); + foreach( $tables as $table ) { + $this->resolveConflictOn( $row, $table ); + } + return true; + } + + function resolveConflictOn( $row, $table ) { + $fname = 'NamespaceConflictChecker::resolveConflictOn'; + echo "... resolving on $table... "; + $this->db->update( $table, + array( + "{$table}_namespace" => $row->namespace, + "{$table}_title" => $row->title, + ), + array( + "{$table}_namespace" => 0, + "{$table}_title" => $row->oldtitle, + ), + $fname ); + echo "ok.\n"; + return true; + } + + function newSchema() { + return class_exists( 'Revision' ); + } +} + + + + +$wgTitle = Title::newFromText( 'Namespace title conflict cleanup script' ); + +$fix = isset( $options['fix'] ); +$suffix = isset( $options['suffix'] ) ? $options['suffix'] : ''; +$prefix = isset( $options['prefix'] ) ? $options['prefix'] : ''; +$key = isset( $options['key'] ) ? intval( $options['key'] ) : 0; +$dbw =& wfGetDB( DB_MASTER ); +$duper = new NamespaceConflictChecker( $dbw ); + +if( $prefix ) { + $retval = $duper->checkPrefix( $key, $prefix, $fix, $suffix ); +} else { + $retval = $duper->checkAll( $fix, $suffix ); +} + +if( $retval ) { + echo "\nLooks good!\n"; + exit( 0 ); +} else { + echo "\nOh noeees\n"; + exit( -1 ); +} + +?> diff --git a/maintenance/nukePage.inc b/maintenance/nukePage.inc new file mode 100644 index 00000000..921faba6 --- /dev/null +++ b/maintenance/nukePage.inc @@ -0,0 +1,80 @@ +<?php + +/** + * Support functions for the nukeArticle script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +require_once( 'purgeOldText.inc' ); + +function NukePage( $name, $delete = false ) { + + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_pag = $dbw->tableName( 'page' ); + $tbl_rec = $dbw->tableName( 'recentchanges' ); + $tbl_rev = $dbw->tableName( 'revision' ); + + # Get page ID + echo( "Searching for \"$name\"..." ); + $title = Title::newFromText( $name ); + if( $title ) { + $id = $title->getArticleID(); + $real = $title->getPrefixedText(); + echo( "found \"$real\" with ID $id.\n" ); + + # Get corresponding revisions + echo( "Searching for revisions..." ); + $res = $dbw->query( "SELECT rev_id FROM $tbl_rev WHERE rev_page = $id" ); + while( $row = $dbw->fetchObject( $res ) ) { + $revs[] = $row->rev_id; + } + $count = count( $revs ); + echo( "found $count.\n" ); + + # Delete the page record and associated recent changes entries + if( $delete ) { + echo( "Deleting page record..." ); + $dbw->query( "DELETE FROM $tbl_pag WHERE page_id = $id" ); + echo( "done.\n" ); + echo( "Cleaning up recent changes..." ); + $dbw->query( "DELETE FROM $tbl_rec WHERE rc_cur_id = $id" ); + echo( "done.\n" ); + } + + $dbw->commit(); + + # Delete revisions as appropriate + if( $delete && $count ) { + echo( "Deleting revisions..." ); + DeleteRevisions( $revs ); + echo( "done.\n" ); + PurgeRedundantText( true ); + } + + } else { + echo( "not found in database.\n" ); + $dbw->commit(); + } + +} + +function DeleteRevisions( $ids ) { + + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_rev = $dbw->tableName( 'revision' ); + + $set = implode( ', ', $ids ); + $dbw->query( "DELETE FROM $tbl_rev WHERE rev_id IN ( $set )" ); + + $dbw->commit(); + +} + +?>
\ No newline at end of file diff --git a/maintenance/nukePage.php b/maintenance/nukePage.php new file mode 100644 index 00000000..b5c3f283 --- /dev/null +++ b/maintenance/nukePage.php @@ -0,0 +1,30 @@ +<?php + +/** + * Erase a page record from the database + * Irreversible (can't use standard undelete) and does not update link tables + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +require_once( 'commandLine.inc' ); +require_once( 'nukePage.inc' ); + +echo( "Erase Page Record\n\n" ); + +if( isset( $args[0] ) ) { + NukePage( $args[0], true ); +} else { + ShowUsage(); +} + +/** Show script usage information */ +function ShowUsage() { + echo( "Remove a page record from the database.\n\n" ); + echo( "Usage: php nukePage.php <title>\n\n" ); + echo( " <title> : Page title; spaces escaped with underscores\n\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/oracle/archives/patch-trackbacks.sql b/maintenance/oracle/archives/patch-trackbacks.sql new file mode 100644 index 00000000..15d4eef1 --- /dev/null +++ b/maintenance/oracle/archives/patch-trackbacks.sql @@ -0,0 +1,10 @@ +CREATE SEQUENCE trackbacks_id_seq; +CREATE TABLE trackbacks ( + tb_id NUMBER PRIMARY KEY, + tb_page NUMBER(8) REFERENCES page(page_id) ON DELETE CASCADE, + tb_title VARCHAR(255) NOT NULL, + tb_url VARCHAR(255) NOT NULL, + tb_ex CLOB, + tb_name VARCHAR(255) +); +CREATE INDEX tb_name_page_idx ON trackbacks(tb_page); diff --git a/maintenance/oracle/archives/patch-transcache.sql b/maintenance/oracle/archives/patch-transcache.sql new file mode 100644 index 00000000..62ad2c7d --- /dev/null +++ b/maintenance/oracle/archives/patch-transcache.sql @@ -0,0 +1,5 @@ +CREATE TABLE transcache ( + tc_url VARCHAR2(255) NOT NULL UNIQUE, + tc_contents CLOB, + tc_time TIMESTAMP NOT NULL +); diff --git a/maintenance/oracle/interwiki.sql b/maintenance/oracle/interwiki.sql new file mode 100644 index 00000000..09d01c64 --- /dev/null +++ b/maintenance/oracle/interwiki.sql @@ -0,0 +1,178 @@ +-- Based more or less on the public interwiki map from MeatballWiki +-- Default interwiki prefixes... + +CALL add_interwiki('abbenormal','http://www.ourpla.net/cgi-bin/pikie.cgi?$1',0); +CALL add_interwiki('acadwiki','http://xarch.tu-graz.ac.at/autocad/wiki/$1',0); +CALL add_interwiki('acronym','http://www.acronymfinder.com/af-query.asp?String=exact&Acronym=$1',0); +CALL add_interwiki('advogato','http://www.advogato.org/$1',0); +CALL add_interwiki('aiwiki','http://www.ifi.unizh.ch/ailab/aiwiki/aiw.cgi?$1',0); +CALL add_interwiki('alife','http://news.alife.org/wiki/index.php?$1',0); +CALL add_interwiki('annotation','http://bayle.stanford.edu/crit/nph-med.cgi/$1',0); +CALL add_interwiki('annotationwiki','http://www.seedwiki.com/page.cfm?wikiid=368&doc=$1',0); +CALL add_interwiki('arxiv','http://www.arxiv.org/abs/$1',0); +CALL add_interwiki('aspienetwiki','http://aspie.mela.de/Wiki/index.php?title=$1',0); +CALL add_interwiki('bemi','http://bemi.free.fr/vikio/index.php?$1',0); +CALL add_interwiki('benefitswiki','http://www.benefitslink.com/cgi-bin/wiki.cgi?$1',0); +CALL add_interwiki('brasilwiki','http://rio.ifi.unizh.ch/brasilienwiki/index.php/$1',0); +CALL add_interwiki('bridgeswiki','http://c2.com/w2/bridges/$1',0); +CALL add_interwiki('c2find','http://c2.com/cgi/wiki?FindPage&value=$1',0); +CALL add_interwiki('cache','http://www.google.com/search?q=cache:$1',0); +CALL add_interwiki('ciscavate','http://ciscavate.org/index.php/$1',0); +CALL add_interwiki('cliki','http://ww.telent.net/cliki/$1',0); +CALL add_interwiki('cmwiki','http://www.ourpla.net/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('codersbase','http://www.codersbase.com/$1',0); +CALL add_interwiki('commons','http://commons.wikimedia.org/wiki/$1',0); +CALL add_interwiki('consciousness','http://teadvus.inspiral.org/',0); +CALL add_interwiki('corpknowpedia','http://corpknowpedia.org/wiki/index.php/$1',0); +CALL add_interwiki('creationmatters','http://www.ourpla.net/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('dejanews','http://www.deja.com/=dnc/getdoc.xp?AN=$1',0); +CALL add_interwiki('demokraatia','http://wiki.demokraatia.ee/',0); +CALL add_interwiki('dictionary','http://www.dict.org/bin/Dict?Database=*&Form=Dict1&Strategy=*&Query=$1',0); +CALL add_interwiki('disinfopedia','http://www.disinfopedia.org/wiki.phtml?title=$1',0); +CALL add_interwiki('diveintoosx','http://diveintoosx.org/$1',0); +CALL add_interwiki('docbook','http://docbook.org/wiki/moin.cgi/$1',0); +CALL add_interwiki('dolphinwiki','http://www.object-arts.com/wiki/html/Dolphin/$1',0); +CALL add_interwiki('drumcorpswiki','http://www.drumcorpswiki.com/index.php/$1',0); +CALL add_interwiki('dwjwiki','http://www.suberic.net/cgi-bin/dwj/wiki.cgi?$1',0); +CALL add_interwiki('eĉei','http://www.ikso.net/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('echei','http://www.ikso.net/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('ecxei','http://www.ikso.net/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('efnetceewiki','http://purl.net/wiki/c/$1',0); +CALL add_interwiki('efnetcppwiki','http://purl.net/wiki/cpp/$1',0); +CALL add_interwiki('efnetpythonwiki','http://purl.net/wiki/python/$1',0); +CALL add_interwiki('efnetxmlwiki','http://purl.net/wiki/xml/$1',0); +CALL add_interwiki('eljwiki','http://elj.sourceforge.net/phpwiki/index.php/$1',0); +CALL add_interwiki('emacswiki','http://www.emacswiki.org/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('elibre','http://enciclopedia.us.es/index.php/$1',0); +CALL add_interwiki('eokulturcentro','http://esperanto.toulouse.free.fr/wakka.php?wiki=$1',0); +CALL add_interwiki('evowiki','http://www.evowiki.org/index.php/$1',0); +CALL add_interwiki('finalempire','http://final-empire.sourceforge.net/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('firstwiki','http://firstwiki.org/index.php/$1',0); +CALL add_interwiki('foldoc','http://www.foldoc.org/foldoc/foldoc.cgi?$1',0); +CALL add_interwiki('foxwiki','http://fox.wikis.com/wc.dll?Wiki~$1',0); +CALL add_interwiki('fr.be','http://fr.wikinations.be/$1',0); +CALL add_interwiki('fr.ca','http://fr.ca.wikinations.org/$1',0); +CALL add_interwiki('fr.fr','http://fr.fr.wikinations.org/$1',0); +CALL add_interwiki('fr.org','http://fr.wikinations.org/$1',0); +CALL add_interwiki('freebsdman','http://www.FreeBSD.org/cgi/man.cgi?apropos=1&query=$1',0); +CALL add_interwiki('gamewiki','http://gamewiki.org/wiki/index.php/$1',0); +CALL add_interwiki('gej','http://www.esperanto.de/cgi-bin/aktivikio/wiki.pl?$1',0); +CALL add_interwiki('gentoo-wiki','http://gentoo-wiki.com/$1',0); +CALL add_interwiki('globalvoices','http://cyber.law.harvard.edu/dyn/globalvoices/wiki/$1',0); +CALL add_interwiki('gmailwiki','http://www.gmailwiki.com/index.php/$1',0); +CALL add_interwiki('google','http://www.google.com/search?q=$1',0); +CALL add_interwiki('googlegroups','http://groups.google.com/groups?q=$1',0); +CALL add_interwiki('gotamac','http://www.got-a-mac.org/$1',0); +CALL add_interwiki('greencheese','http://www.greencheese.org/$1',0); +CALL add_interwiki('hammondwiki','http://www.dairiki.org/HammondWiki/index.php3?$1',0); +CALL add_interwiki('haribeau','http://wiki.haribeau.de/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('hewikisource','http://he.wikisource.org/wiki/$1',1); +CALL add_interwiki('herzkinderwiki','http://www.herzkinderinfo.de/Mediawiki/index.php/$1',0); +CALL add_interwiki('hrwiki','http://www.hrwiki.org/index.php/$1',0); +CALL add_interwiki('iawiki','http://www.IAwiki.net/$1',0); +CALL add_interwiki('imdb','http://us.imdb.com/Title?$1',0); +CALL add_interwiki('infosecpedia','http://www.infosecpedia.org/pedia/index.php/$1',0); +CALL add_interwiki('jargonfile','http://sunir.org/apps/meta.pl?wiki=JargonFile&redirect=$1',0); +CALL add_interwiki('jefo','http://www.esperanto-jeunes.org/vikio/index.php?$1',0); +CALL add_interwiki('jiniwiki','http://www.cdegroot.com/cgi-bin/jini?$1',0); +CALL add_interwiki('jspwiki','http://www.ecyrd.com/JSPWiki/Wiki.jsp?page=$1',0); +CALL add_interwiki('kerimwiki','http://wiki.oxus.net/$1',0); +CALL add_interwiki('kmwiki','http://www.voght.com/cgi-bin/pywiki?$1',0); +CALL add_interwiki('knowhow','http://www2.iro.umontreal.ca/~paquetse/cgi-bin/wiki.cgi?$1',0); +CALL add_interwiki('lanifexwiki','http://opt.lanifex.com/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('lasvegaswiki','http://wiki.gmnow.com/index.php/$1',0); +CALL add_interwiki('linuxwiki','http://www.linuxwiki.de/$1',0); +CALL add_interwiki('lojban','http://www.lojban.org/tiki/tiki-index.php?page=$1',0); +CALL add_interwiki('lqwiki','http://wiki.linuxquestions.org/wiki/$1',0); +CALL add_interwiki('lugkr','http://lug-kr.sourceforge.net/cgi-bin/lugwiki.pl?$1',0); +CALL add_interwiki('lutherwiki','http://www.lutheranarchives.com/mw/index.php/$1',0); +CALL add_interwiki('mathsongswiki','http://SeedWiki.com/page.cfm?wikiid=237&doc=$1',0); +CALL add_interwiki('mbtest','http://www.usemod.com/cgi-bin/mbtest.pl?$1',0); +CALL add_interwiki('meatball','http://www.usemod.com/cgi-bin/mb.pl?$1',0); +CALL add_interwiki('mediazilla','http://bugzilla.wikipedia.org/$1',1); +CALL add_interwiki('memoryalpha','http://www.memory-alpha.org/en/index.php/$1',0); +CALL add_interwiki('metaweb','http://www.metaweb.com/wiki/wiki.phtml?title=$1',0); +CALL add_interwiki('metawiki','http://sunir.org/apps/meta.pl?$1',0); +CALL add_interwiki('metawikipedia','http://meta.wikimedia.org/wiki/$1',0); +CALL add_interwiki('moinmoin','http://purl.net/wiki/moin/$1',0); +CALL add_interwiki('mozillawiki','http://wiki.mozilla.org/index.php/$1',0); +CALL add_interwiki('muweb','http://www.dunstable.com/scripts/MuWebWeb?$1',0); +CALL add_interwiki('netvillage','http://www.netbros.com/?$1',0); +CALL add_interwiki('oeis','http://www.research.att.com/cgi-bin/access.cgi/as/njas/sequences/eisA.cgi?Anum=$1',0); +CALL add_interwiki('openfacts','http://openfacts.berlios.de/index.phtml?title=$1',0); +CALL add_interwiki('openwiki','http://openwiki.com/?$1',0); +CALL add_interwiki('opera7wiki','http://nontroppo.org/wiki/$1',0); +CALL add_interwiki('orgpatterns','http://www.bell-labs.com/cgi-user/OrgPatterns/OrgPatterns?$1',0); +CALL add_interwiki('osi reference model','http://wiki.tigma.ee/',0); +CALL add_interwiki('pangalacticorg','http://www.pangalactic.org/Wiki/$1',0); +CALL add_interwiki('personaltelco','http://www.personaltelco.net/index.cgi/$1',0); +CALL add_interwiki('patwiki','http://gauss.ffii.org/$1',0); +CALL add_interwiki('phpwiki','http://phpwiki.sourceforge.net/phpwiki/index.php?$1',0); +CALL add_interwiki('pikie','http://pikie.darktech.org/cgi/pikie?$1',0); +CALL add_interwiki('pmeg','http://www.bertilow.com/pmeg/$1.php',0); +CALL add_interwiki('ppr','http://c2.com/cgi/wiki?$1',0); +CALL add_interwiki('purlnet','http://purl.oclc.org/NET/$1',0); +CALL add_interwiki('pythoninfo','http://www.python.org/cgi-bin/moinmoin/$1',0); +CALL add_interwiki('pythonwiki','http://www.pythonwiki.de/$1',0); +CALL add_interwiki('pywiki','http://www.voght.com/cgi-bin/pywiki?$1',0); +CALL add_interwiki('raec','http://www.raec.clacso.edu.ar:8080/raec/Members/raecpedia/$1',0); +CALL add_interwiki('revo','http://purl.org/NET/voko/revo/art/$1.html',0); +CALL add_interwiki('rfc','http://www.rfc-editor.org/rfc/rfc$1.txt',0); +CALL add_interwiki('s23wiki','http://is-root.de/wiki/index.php/$1',0); +CALL add_interwiki('scoutpedia','http://www.scoutpedia.info/index.php/$1',0); +CALL add_interwiki('seapig','http://www.seapig.org/$1',0); +CALL add_interwiki('seattlewiki','http://seattlewiki.org/wiki/$1',0); +CALL add_interwiki('seattlewireless','http://seattlewireless.net/?$1',0); +CALL add_interwiki('seeds','http://www.IslandSeeds.org/wiki/$1',0); +CALL add_interwiki('senseislibrary','http://senseis.xmp.net/?$1',0); +CALL add_interwiki('shakti','http://cgi.algonet.se/htbin/cgiwrap/pgd/ShaktiWiki/$1',0); +CALL add_interwiki('slashdot','http://slashdot.org/article.pl?sid=$1',0); +CALL add_interwiki('smikipedia','http://www.smikipedia.org/$1',0); +CALL add_interwiki('sockwiki','http://wiki.socklabs.com/$1',0); +CALL add_interwiki('sourceforge','http://sourceforge.net/$1',0); +CALL add_interwiki('squeak','http://minnow.cc.gatech.edu/squeak/$1',0); +CALL add_interwiki('strikiwiki','http://ch.twi.tudelft.nl/~mostert/striki/teststriki.pl?$1',0); +CALL add_interwiki('susning','http://www.susning.nu/$1',0); +CALL add_interwiki('svgwiki','http://www.protocol7.com/svg-wiki/default.asp?$1',0); +CALL add_interwiki('tavi','http://tavi.sourceforge.net/$1',0); +CALL add_interwiki('tejo','http://www.tejo.org/vikio/$1',0); +CALL add_interwiki('terrorwiki','http://www.liberalsagainstterrorism.com/wiki/index.php/$1',0); +CALL add_interwiki('tmbw','http://www.tmbw.net/wiki/index.php/$1',0); +CALL add_interwiki('tmnet','http://www.technomanifestos.net/?$1',0); +CALL add_interwiki('tmwiki','http://www.EasyTopicMaps.com/?page=$1',0); +CALL add_interwiki('turismo','http://www.tejo.org/turismo/$1',0); +CALL add_interwiki('theopedia','http://www.theopedia.com/$1',0); +CALL add_interwiki('twiki','http://twiki.org/cgi-bin/view/$1',0); +CALL add_interwiki('twistedwiki','http://purl.net/wiki/twisted/$1',0); +CALL add_interwiki('uea','http://www.tejo.org/uea/$1',0); +CALL add_interwiki('unreal','http://wiki.beyondunreal.com/wiki/$1',0); +CALL add_interwiki('ursine','http://ursine.ca/$1',0); +CALL add_interwiki('usej','http://www.tejo.org/usej/$1',0); +CALL add_interwiki('usemod','http://www.usemod.com/cgi-bin/wiki.pl?$1',0); +CALL add_interwiki('visualworks','http://wiki.cs.uiuc.edu/VisualWorks/$1',0); +CALL add_interwiki('warpedview','http://www.warpedview.com/index.php/$1',0); +CALL add_interwiki('webdevwikinl','http://www.promo-it.nl/WebDevWiki/index.php?page=$1',0); +CALL add_interwiki('webisodes','http://www.webisodes.org/$1',0); +CALL add_interwiki('webseitzwiki','http://webseitz.fluxent.com/wiki/$1',0); +CALL add_interwiki('why','http://clublet.com/c/c/why?$1',0); +CALL add_interwiki('wiki','http://c2.com/cgi/wiki?$1',0); +CALL add_interwiki('wikia','http://www.wikia.com/wiki/index.php/$1',0); +CALL add_interwiki('wikibooks','http://en.wikibooks.org/wiki/$1',1); +CALL add_interwiki('wikicities','http://www.wikicities.com/index.php/$1',0); +CALL add_interwiki('wikif1','http://www.wikif1.org/$1',0); +CALL add_interwiki('wikinfo','http://www.wikinfo.org/wiki.php?title=$1',0); +CALL add_interwiki('wikimedia','http://wikimediafoundation.org/wiki/$1',0); +CALL add_interwiki('wikiquote','http://en.wikiquote.org/wiki/$1',1); +CALL add_interwiki('wikinews','http://en.wikinews.org/wiki/$1',0); +CALL add_interwiki('wikisource','http://sources.wikipedia.org/wiki/$1',1); +CALL add_interwiki('wikispecies','http://species.wikipedia.org/wiki/$1',1); +CALL add_interwiki('wikitravel','http://wikitravel.org/en/$1',0); +CALL add_interwiki('wikiworld','http://WikiWorld.com/wiki/index.php/$1',0); +CALL add_interwiki('wiktionary','http://en.wiktionary.org/wiki/$1',1); +CALL add_interwiki('wlug','http://www.wlug.org.nz/$1',0); +CALL add_interwiki('wlwiki','http://winslowslair.supremepixels.net/wiki/index.php/$1',0); +CALL add_interwiki('ypsieyeball','http://sknkwrks.dyndns.org:1957/writewiki/wiki.pl?$1',0); +CALL add_interwiki('zwiki','http://www.zwiki.org/$1',0); +CALL add_interwiki('zzz wiki','http://wiki.zzz.ee/',0); +CALL add_interwiki('wikt','http://en.wiktionary.org/wiki/$1',1); + diff --git a/maintenance/oracle/tables.sql b/maintenance/oracle/tables.sql new file mode 100644 index 00000000..6733f950 --- /dev/null +++ b/maintenance/oracle/tables.sql @@ -0,0 +1,333 @@ +-- SQL to create the initial tables for the MediaWiki database. +-- This is read and executed by the install script; you should +-- not have to run it by itself unless doing a manual install. + +CREATE SEQUENCE user_user_id_seq; + +CREATE TABLE "user" ( + user_id NUMBER(5) NOT NULL PRIMARY KEY, + user_name VARCHAR2(255) DEFAULT '' NOT NULL, + user_real_name VARCHAR2(255) DEFAULT '', + user_password VARCHAR2(128) DEFAULT '', + user_newpassword VARCHAR2(128) default '', + user_email VARCHAR2(255) default '', + user_options CLOB default '', + user_touched TIMESTAMP WITH TIME ZONE, + user_token CHAR(32) default '', + user_email_authenticated TIMESTAMP WITH TIME ZONE DEFAULT NULL, + user_email_token CHAR(32), + user_email_token_expires TIMESTAMP WITH TIME ZONE DEFAULT NULL +); +CREATE UNIQUE INDEX user_name_idx ON "user" (user_name); +CREATE INDEX user_email_token_idx ON "user" (user_email_token); + +CREATE TABLE user_groups ( + ug_user NUMBER(5) DEFAULT '0' NOT NULL + REFERENCES "user" (user_id) + ON DELETE CASCADE, + ug_group VARCHAR2(16) NOT NULL, + CONSTRAINT user_groups_pk PRIMARY KEY (ug_user, ug_group) +); +CREATE INDEX user_groups_group_idx ON user_groups(ug_group); + +CREATE TABLE user_newtalk ( + user_id NUMBER(5) DEFAULT 0 NOT NULL, + user_ip VARCHAR2(40) DEFAULT '' NOT NULL +); +CREATE INDEX user_newtalk_id_idx ON user_newtalk(user_id); +CREATE INDEX user_newtalk_ip_idx ON user_newtalk(user_ip); + +CREATE SEQUENCE page_page_id_seq; +CREATE TABLE page ( + page_id NUMBER(8) NOT NULL PRIMARY KEY, + page_namespace NUMBER(5) NOT NULL, + page_title VARCHAR(255) NOT NULL, + page_restrictions CLOB DEFAULT '', + page_counter NUMBER(20) DEFAULT 0 NOT NULL, + page_is_redirect NUMBER(1) DEFAULT 0 NOT NULL, + page_is_new NUMBER(1) DEFAULT 0 NOT NULL, + page_random NUMBER(25, 24) NOT NULL, + page_touched TIMESTAMP WITH TIME ZONE, + page_latest NUMBER(8) NOT NULL, + page_len NUMBER(8) DEFAULT 0 +); +CREATE UNIQUE INDEX page_id_namespace_title_idx ON page(page_namespace, page_title); +CREATE INDEX page_random_idx ON page(page_random); +CREATE INDEX page_len_idx ON page(page_len); + +CREATE SEQUENCE rev_rev_id_val; +CREATE TABLE revision ( + rev_id NUMBER(8) NOT NULL, + rev_page NUMBER(8) NOT NULL + REFERENCES page (page_id) + ON DELETE CASCADE, + rev_text_id NUMBER(8) NOT NULL, + rev_comment CLOB, + rev_user NUMBER(8) DEFAULT 0 NOT NULL, + rev_user_text VARCHAR2(255) DEFAULT '' NOT NULL, + rev_timestamp TIMESTAMP WITH TIME ZONE NOT NULL, + rev_minor_edit NUMBER(1) DEFAULT 0 NOT NULL, + rev_deleted NUMBER(1) DEFAULT 0 NOT NULL, + CONSTRAINT revision_pk PRIMARY KEY (rev_page, rev_id) +); + +CREATE UNIQUE INDEX rev_id_idx ON revision(rev_id); +CREATE INDEX rev_timestamp_idx ON revision(rev_timestamp); +CREATE INDEX rev_page_timestamp_idx ON revision(rev_page, rev_timestamp); +CREATE INDEX rev_user_timestamp_idx ON revision(rev_user, rev_timestamp); +CREATE INDEX rev_usertext_timestamp_idx ON revision(rev_user_text, rev_timestamp); + +CREATE SEQUENCE text_old_id_val; + +CREATE TABLE text ( + old_id NUMBER(8) NOT NULL, + old_text CLOB, + old_flags CLOB, + CONSTRAINT text_pk PRIMARY KEY (old_id) +); + +CREATE TABLE archive ( + ar_namespace NUMBER(5) NOT NULL, + ar_title VARCHAR2(255) NOT NULL, + ar_text CLOB, + ar_comment CLOB, + ar_user NUMBER(8), + ar_user_text VARCHAR2(255) NOT NULL, + ar_timestamp TIMESTAMP WITH TIME ZONE NOT NULL, + ar_minor_edit NUMBER(1) DEFAULT 0 NOT NULL, + ar_flags CLOB, + ar_rev_id NUMBER(8), + ar_text_id NUMBER(8) +); +CREATE INDEX archive_name_title_timestamp ON archive(ar_namespace,ar_title,ar_timestamp); + +CREATE TABLE pagelinks ( + pl_from NUMBER(8) NOT NULL + REFERENCES page(page_id) + ON DELETE CASCADE, + pl_namespace NUMBER(4) DEFAULT 0 NOT NULL, + pl_title VARCHAR2(255) NOT NULL +); +CREATE UNIQUE INDEX pl_from ON pagelinks(pl_from, pl_namespace, pl_title); +CREATE INDEX pl_namespace ON pagelinks(pl_namespace, pl_title); + +CREATE TABLE imagelinks ( + il_from NUMBER(8) NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + il_to VARCHAR2(255) NOT NULL +); +CREATE UNIQUE INDEX il_from ON imagelinks(il_from, il_to); +CREATE INDEX il_to ON imagelinks(il_to); + +CREATE TABLE categorylinks ( + cl_from NUMBER(8) NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + cl_to VARCHAR2(255) NOT NULL, + cl_sortkey VARCHAR2(86) default '', + cl_timestamp TIMESTAMP WITH TIME ZONE NOT NULL +); +CREATE UNIQUE INDEX cl_from ON categorylinks(cl_from, cl_to); +CREATE INDEX cl_sortkey ON categorylinks(cl_to, cl_sortkey); +CREATE INDEX cl_timestamp ON categorylinks(cl_to, cl_timestamp); + +-- +-- Contains a single row with some aggregate info +-- on the state of the site. +-- +CREATE TABLE site_stats ( + ss_row_id NUMBER(8) NOT NULL, + ss_total_views NUMBER(20) default 0, + ss_total_edits NUMBER(20) default 0, + ss_good_articles NUMBER(20) default 0, + ss_total_pages NUMBER(20) default -1, + ss_users NUMBER(20) default -1, + ss_admins NUMBER(10) default -1 +); +CREATE UNIQUE INDEX ss_row_id ON site_stats(ss_row_id); + +-- +-- Stores an ID for every time any article is visited; +-- depending on $wgHitcounterUpdateFreq, it is +-- periodically cleared and the page_counter column +-- in the page table updated for the all articles +-- that have been visited.) +-- +CREATE TABLE hitcounter ( + hc_id NUMBER NOT NULL +); + +-- +-- The internet is full of jerks, alas. Sometimes it's handy +-- to block a vandal or troll account. +-- +CREATE SEQUENCE ipblocks_ipb_id_val; +CREATE TABLE ipblocks ( + ipb_id NUMBER(8) NOT NULL, + ipb_address VARCHAR2(40), + ipb_user NUMBER(8), + ipb_by NUMBER(8) NOT NULL + REFERENCES "user" (user_id) + ON DELETE CASCADE, + ipb_reason CLOB, + ipb_timestamp TIMESTAMP WITH TIME ZONE NOT NULL, + ipb_auto NUMBER(1) DEFAULT 0 NOT NULL, + ipb_expiry TIMESTAMP WITH TIME ZONE, + CONSTRAINT ipblocks_pk PRIMARY KEY (ipb_id) +); +CREATE INDEX ipb_address ON ipblocks(ipb_address); +CREATE INDEX ipb_user ON ipblocks(ipb_user); + +CREATE TABLE image ( + img_name VARCHAR2(255) NOT NULL, + img_size NUMBER(8) NOT NULL, + img_width NUMBER(5) NOT NULL, + img_height NUMBER(5) NOT NULL, + img_metadata CLOB, + img_bits NUMBER(3), + img_media_type VARCHAR2(10), + img_major_mime VARCHAR2(12) DEFAULT 'unknown', + img_minor_mime VARCHAR2(32) DEFAULT 'unknown', + img_description CLOB NOT NULL, + img_user NUMBER(8) NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + img_user_text VARCHAR2(255) NOT NULL, + img_timestamp TIMESTAMP WITH TIME ZONE, + CONSTRAINT image_pk PRIMARY KEY (img_name) +); +CREATE INDEX img_size_idx ON image(img_size); +CREATE INDEX img_timestamp_idx ON image(img_timestamp); + +CREATE TABLE oldimage ( + oi_name VARCHAR2(255) NOT NULL, + oi_archive_name VARCHAR2(255) NOT NULL, + oi_size NUMBER(8) NOT NULL, + oi_width NUMBER(5) NOT NULL, + oi_height NUMBER(5) NOT NULL, + oi_bits NUMBER(3) NOT NULL, + oi_description CLOB, + oi_user NUMBER(8) NOT NULL REFERENCES "user"(user_id), + oi_user_text VARCHAR2(255) NOT NULL, + oi_timestamp TIMESTAMP WITH TIME ZONE NOT NULL +); +CREATE INDEX oi_name ON oldimage (oi_name); + +CREATE SEQUENCE rc_rc_id_seq; +CREATE TABLE recentchanges ( + rc_id NUMBER(8) NOT NULL, + rc_timestamp TIMESTAMP WITH TIME ZONE, + rc_cur_time TIMESTAMP WITH TIME ZONE, + rc_user NUMBER(8) DEFAULT 0 NOT NULL, + rc_user_text VARCHAR2(255), + rc_namespace NUMBER(4) DEFAULT 0 NOT NULL, + rc_title VARCHAR2(255) NOT NULL, + rc_comment VARCHAR2(255), + rc_minor NUMBER(3) DEFAULT 0 NOT NULL, + rc_bot NUMBER(3) DEFAULT 0 NOT NULL, + rc_new NUMBER(3) DEFAULT 0 NOT NULL, + rc_cur_id NUMBER(8), + rc_this_oldid NUMBER(8) NOT NULL, + rc_last_oldid NUMBER(8) NOT NULL, + rc_type NUMBER(3) DEFAULT 0 NOT NULL, + rc_moved_to_ns NUMBER(3), + rc_moved_to_title VARCHAR2(255), + rc_patrolled NUMBER(3) DEFAULT 0 NOT NULL, + rc_ip VARCHAR2(40), + CONSTRAINT rc_pk PRIMARY KEY (rc_id) +); +CREATE INDEX rc_timestamp ON recentchanges (rc_timestamp); +CREATE INDEX rc_namespace_title ON recentchanges(rc_namespace, rc_title); +CREATE INDEX rc_cur_id ON recentchanges(rc_cur_id); +CREATE INDEX new_name_timestamp ON recentchanges(rc_new, rc_namespace, rc_timestamp); +CREATE INDEX rc_ip ON recentchanges(rc_ip); + +CREATE TABLE watchlist ( + wl_user NUMBER(8) NOT NULL + REFERENCES "user"(user_id) + ON DELETE CASCADE, + wl_namespace NUMBER(8) DEFAULT 0 NOT NULL, + wl_title VARCHAR2(255) NOT NULL, + wl_notificationtimestamp TIMESTAMP WITH TIME ZONE DEFAULT NULL +); +CREATE UNIQUE INDEX wl_user_namespace_title ON watchlist + (wl_user, wl_namespace, wl_title); +CREATE INDEX wl_namespace_title ON watchlist(wl_namespace, wl_title); + +-- +-- Used by texvc math-rendering extension to keep track +-- of previously-rendered items. +-- +CREATE TABLE math ( + math_inputhash VARCHAR2(16) NOT NULL UNIQUE, + math_outputhash VARCHAR2(16) NOT NULL, + math_html_conservativeness NUMBER(1) NOT NULL, + math_html CLOB, + math_mathml CLOB +); + +-- +-- Recognized interwiki link prefixes +-- +CREATE TABLE interwiki ( + iw_prefix VARCHAR2(32) NOT NULL UNIQUE, + iw_url VARCHAR2(127) NOT NULL, + iw_local NUMBER(1) NOT NULL, + iw_trans NUMBER(1) DEFAULT 0 NOT NULL +); + +CREATE TABLE querycache ( + qc_type VARCHAR2(32) NOT NULL, + qc_value NUMBER(5) DEFAULT 0 NOT NULL, + qc_namespace NUMBER(4) DEFAULT 0 NOT NULL, + qc_title VARCHAR2(255) +); +CREATE INDEX querycache_type_value ON querycache(qc_type, qc_value); + +-- +-- For a few generic cache operations if not using Memcached +-- +CREATE TABLE objectcache ( + keyname CHAR(255) DEFAULT '', + value CLOB, + exptime TIMESTAMP WITH TIME ZONE +); +CREATE UNIQUE INDEX oc_keyname_idx ON objectcache(keyname); +CREATE INDEX oc_exptime_idx ON objectcache(exptime); + +CREATE TABLE logging ( + log_type VARCHAR2(10) NOT NULL, + log_action VARCHAR2(10) NOT NULL, + log_timestamp TIMESTAMP WITH TIME ZONE NOT NULL, + log_user NUMBER(8) REFERENCES "user"(user_id), + log_namespace NUMBER(4), + log_title VARCHAR2(255) NOT NULL, + log_comment VARCHAR2(255), + log_params CLOB +); +CREATE INDEX logging_type_name ON logging(log_type, log_timestamp); +CREATE INDEX logging_user_time ON logging(log_user, log_timestamp); +CREATE INDEX logging_page_time ON logging(log_namespace, log_title, log_timestamp); + +-- Hold group name and description +--CREATE TABLE /*$wgDBprefix*/groups ( +-- gr_id int(5) unsigned NOT NULL auto_increment, +-- gr_name varchar(50) NOT NULL default '', +-- gr_description varchar(255) NOT NULL default '', +-- gr_rights tinyblob, +-- PRIMARY KEY (gr_id) +-- +--) TYPE=InnoDB; + +CREATE OR REPLACE PROCEDURE add_user_right (name VARCHAR2, new_right VARCHAR2) AS + user_id "user".user_id%TYPE;; + user_is_missing EXCEPTION;; +BEGIN + SELECT user_id INTO user_id FROM "user" WHERE user_name = name;; + INSERT INTO user_groups (ug_user, ug_group) VALUES(user_id, new_right);; +EXCEPTION + WHEN NO_DATA_FOUND THEN + DBMS_OUTPUT.PUT_LINE('The specified user does not exist.');; +END add_user_right;; +; + +CREATE OR REPLACE PROCEDURE add_interwiki (prefix VARCHAR2, url VARCHAR2, is_local NUMBER) AS +BEGIN + INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES(prefix, url, is_local);; +END add_interwiki;; +;
\ No newline at end of file diff --git a/maintenance/orphans.php b/maintenance/orphans.php new file mode 100644 index 00000000..3bfa79f5 --- /dev/null +++ b/maintenance/orphans.php @@ -0,0 +1,207 @@ +<?php +# Copyright (C) 2005 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Look for 'orphan' revisions hooked to pages which don't exist + * And 'childless' pages with no revisions. + * Then, kill the poor widows and orphans. + * Man this is depressing. + * + * @author <brion@pobox.com> + * @package MediaWiki + * @subpackage Maintenance + */ + +$options = array( 'fix' ); + +/** */ +require_once( 'commandLine.inc' ); +$wgTitle = Title::newFromText( 'Orphan revision cleanup script' ); + +checkOrphans( isset( $options['fix'] ) ); +checkSeparation( isset( $options['fix'] ) ); +#checkWidows( isset( $options['fix'] ) ); + +# ------ + +function checkOrphans( $fix ) { + $dbw =& wfGetDB( DB_MASTER ); + $page = $dbw->tableName( 'page' ); + $revision = $dbw->tableName( 'revision' ); + + if( $fix ) { + $dbw->query( "LOCK TABLES $page WRITE, $revision WRITE" ); + } + + echo "Checking for orphan revision table entries... (this may take a while on a large wiki)\n"; + $result = $dbw->query( " + SELECT * + FROM $revision LEFT OUTER JOIN $page ON rev_page=page_id + WHERE page_id IS NULL + "); + $orphans = $dbw->numRows( $result ); + if( $orphans > 0 ) { + global $wgContLang; + echo "$orphans orphan revisions...\n"; + printf( "%10s %10s %14s %20s %s\n", 'rev_id', 'rev_page', 'rev_timestamp', 'rev_user_text', 'rev_comment' ); + while( $row = $dbw->fetchObject( $result ) ) { + $comment = ( $row->rev_comment == '' ) + ? '' + : '(' . $wgContLang->truncate( $row->rev_comment, 40, '...' ) . ')'; + printf( "%10d %10d %14s %20s %s\n", + $row->rev_id, + $row->rev_page, + $row->rev_timestamp, + $wgContLang->truncate( $row->rev_user_text, 17, '...' ), + $comment ); + if( $fix ) { + $dbw->delete( 'revision', array( 'rev_id' => $row->rev_id ) ); + } + } + if( !$fix ) { + echo "Run again with --fix to remove these entries automatically.\n"; + } + } else { + echo "No orphans! Yay!\n"; + } + + if( $fix ) { + $dbw->query( "UNLOCK TABLES" ); + } +} + +/** + * @todo DON'T USE THIS YET! It will remove entries which have children, + * but which aren't properly attached (eg if page_latest is bogus + * but valid revisions do exist) + */ +function checkWidows( $fix ) { + $dbw =& wfGetDB( DB_MASTER ); + $page = $dbw->tableName( 'page' ); + $revision = $dbw->tableName( 'revision' ); + + if( $fix ) { + $dbw->query( "LOCK TABLES $page WRITE, $revision WRITE" ); + } + + echo "\nChecking for childless page table entries... (this may take a while on a large wiki)\n"; + $result = $dbw->query( " + SELECT * + FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id + WHERE rev_id IS NULL + "); + $widows = $dbw->numRows( $result ); + if( $widows > 0 ) { + global $wgContLang; + echo "$widows childless pages...\n"; + printf( "%10s %11s %2s %s\n", 'page_id', 'page_latest', 'ns', 'page_title' ); + while( $row = $dbw->fetchObject( $result ) ) { + printf( "%10d %11d %2d %s\n", + $row->page_id, + $row->page_latest, + $row->page_namespace, + $row->page_title ); + if( $fix ) { + $dbw->delete( 'page', array( 'page_id' => $row->page_id ) ); + } + } + if( !$fix ) { + echo "Run again with --fix to remove these entries automatically.\n"; + } + } else { + echo "No childless pages! Yay!\n"; + } + + if( $fix ) { + $dbw->query( "UNLOCK TABLES" ); + } +} + + +function checkSeparation( $fix ) { + $dbw =& wfGetDB( DB_MASTER ); + $page = $dbw->tableName( 'page' ); + $revision = $dbw->tableName( 'revision' ); + $text = $dbw->tableName( 'text' ); + + if( $fix ) { + $dbw->query( "LOCK TABLES $page WRITE, $revision WRITE, $text WRITE" ); + } + + echo "\nChecking for pages whose page_latest links are incorrect... (this may take a while on a large wiki)\n"; + $result = $dbw->query( " + SELECT * + FROM $page LEFT OUTER JOIN $revision ON page_latest=rev_id + "); + $found = 0; + while( $row = $dbw->fetchObject( $result ) ) { + $result2 = $dbw->query( " + SELECT MAX(rev_timestamp) as max_timestamp + FROM $revision + WHERE rev_page=$row->page_id + " ); + $row2 = $dbw->fetchObject( $result2 ); + $dbw->freeResult( $result2 ); + if( $row2 ) { + if( $row->rev_timestamp != $row2->max_timestamp ) { + if( $found == 0 ) { + printf( "%10s %10s %14s %14s\n", + 'page_id', 'rev_id', 'timestamp', 'max timestamp' ); + } + ++$found; + printf( "%10d %10d %14s %14s\n", + $row->page_id, + $row->page_latest, + $row->rev_timestamp, + $row2->max_timestamp ); + if( $fix ) { + # ... + $maxId = $dbw->selectField( + 'revision', + 'rev_id', + array( + 'rev_page' => $row->page_id, + 'rev_timestamp' => $row2->max_timestamp ) ); + echo "... updating to revision $maxId\n"; + $maxRev = Revision::newFromId( $maxId ); + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $article = new Article( $title ); + $article->updateRevisionOn( $dbw, $maxRev ); + } + } + } else { + echo "wtf\n"; + } + } + + if( $found ) { + echo "Found $found pages with incorrect latest revision.\n"; + } else { + echo "No pages with incorrect latest revision. Yay!\n"; + } + if( !$fix && $found > 0 ) { + echo "Run again with --fix to remove these entries automatically.\n"; + } + + if( $fix ) { + $dbw->query( "UNLOCK TABLES" ); + } +} + +?>
\ No newline at end of file diff --git a/maintenance/ourusers.php b/maintenance/ourusers.php new file mode 100644 index 00000000..0d625571 --- /dev/null +++ b/maintenance/ourusers.php @@ -0,0 +1,121 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$wikiuser_pass = `wikiuser_pass`; +$wikiadmin_pass = `wikiadmin_pass`; +$wikisql_pass = `wikisql_pass`; + +if ( @$argv[1] == 'yaseo' ) { + $hosts = array( + 'localhost', + '211.115.107.158', + '211.115.107.159', + '211.115.107.160', + '211.115.107.138', + '211.115.107.139', + '211.115.107.140', + '211.115.107.141', + '211.115.107.142', + '211.115.107.143', + '211.115.107.144', + '211.115.107.145', + '211.115.107.146', + '211.115.107.147', + '211.115.107.148', + '211.115.107.149', + '211.115.107.150', + '211.115.107.152', + '211.115.107.153', + '211.115.107.154', + '211.115.107.155', + '211.115.107.156', + '211.115.107.157', + ); +} else { + $hosts = array( + 'localhost', + '207.142.131.194', + '207.142.131.195', + '207.142.131.196', + '207.142.131.197', + '207.142.131.198', + '207.142.131.199', + '207.142.131.221', + '207.142.131.226', + '207.142.131.227', + '207.142.131.228', + '207.142.131.229', + '207.142.131.230', + '207.142.131.231', + '207.142.131.232', + '207.142.131.233', + '207.142.131.234', + '207.142.131.237', + '207.142.131.238', + '207.142.131.239', + '207.142.131.243', + '207.142.131.244', + '207.142.131.249', + '207.142.131.250', + '207.142.131.216', + '10.0.%', + ); +} + +$databases = array( + '%wikibooks', + '%wiki', + '%wikiquote', + '%wiktionary', + '%wikisource', + '%wikinews', + '%wikiversity', + '%wikimedia', +); + +foreach( $hosts as $host ) { + print "--\n-- $host\n--\n\n-- wikiuser\n\n"; + print "GRANT REPLICATION CLIENT,PROCESS ON *.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; + print "GRANT ALL PRIVILEGES ON `boardvote`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; + print "GRANT ALL PRIVILEGES ON `boardvote2005`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; + foreach( $databases as $db ) { + print "GRANT SELECT, INSERT, UPDATE, DELETE ON `$db`.* TO 'wikiuser'@'$host' IDENTIFIED BY '$wikiuser_pass';\n"; + } + +/* + print "\n-- wikisql\n\n"; + foreach ( $databases as $db ) { +print <<<EOS +GRANT SELECT ON `$db`.`old` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`imagelinks` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`image` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`watchlist` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`site_stats` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`archive` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`links` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`ipblocks` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`cur` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT (user_rights, user_id, user_name, user_options) ON `$db`.`user` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`oldimage` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`recentchanges` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`math` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; +GRANT SELECT ON `$db`.`brokenlinks` TO 'wikisql'@'$host' IDENTIFIED BY '$wikisql_pass'; + +EOS; + }*/ + + print "\n-- wikiadmin\n\n"; + print "GRANT PROCESS, REPLICATION CLIENT ON *.* TO 'wikiadmin'@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; + print "GRANT ALL PRIVILEGES ON `boardvote`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; + print "GRANT ALL PRIVILEGES ON `boardvote2005`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; + foreach ( $databases as $db ) { + print "GRANT ALL PRIVILEGES ON `$db`.* TO wikiadmin@'$host' IDENTIFIED BY '$wikiadmin_pass';\n"; + } + print "\n"; +} +?> diff --git a/maintenance/parserTests.inc b/maintenance/parserTests.inc new file mode 100644 index 00000000..9f93c4ac --- /dev/null +++ b/maintenance/parserTests.inc @@ -0,0 +1,791 @@ +<?php +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @todo Make this more independent of the configuration (and if possible the database) + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$options = array( 'quick', 'color', 'quiet', 'help' ); +$optionsWithArgs = array( 'regex' ); + +require_once( 'commandLine.inc' ); +require_once( "$IP/includes/ObjectCache.php" ); +require_once( "$IP/includes/BagOStuff.php" ); +require_once( "$IP/languages/LanguageUtf8.php" ); +require_once( "$IP/includes/Hooks.php" ); +require_once( "$IP/maintenance/parserTestsParserHook.php" ); +require_once( "$IP/maintenance/parserTestsStaticParserHook.php" ); +require_once( "$IP/maintenance/parserTestsParserTime.php" ); + +/** + * @package MediaWiki + * @subpackage Maintenance + */ +class ParserTest { + /** + * boolean $color whereas output should be colorized + * @private + */ + var $color; + + /** + * boolean $lightcolor whereas output should use light colors + * @private + */ + var $lightcolor; + + /** + * Sets terminal colorization and diff/quick modes depending on OS and + * command-line options (--color and --quick). + * + * @public + */ + function ParserTest() { + global $options; + + # Only colorize output if stdout is a terminal. + $this->lightcolor = false; + $this->color = !wfIsWindows() && posix_isatty(1); + + if( isset( $options['color'] ) ) { + switch( $options['color'] ) { + case 'no': + $this->color = false; + break; + case 'light': + $this->lightcolor = true; + # Fall through + case 'yes': + default: + $this->color = true; + break; + } + } + + $this->showDiffs = !isset( $options['quick'] ); + + $this->quiet = isset( $options['quiet'] ); + + if (isset($options['regex'])) { + $this->regex = $options['regex']; + } else { + # Matches anything + $this->regex = ''; + } + + $this->hooks = array(); + } + + /** + * Remove last character if it is a newline + * @private + */ + function chomp($s) { + if (substr($s, -1) === "\n") { + return substr($s, 0, -1); + } + else { + return $s; + } + } + + /** + * Run a series of tests listed in the given text file. + * Each test consists of a brief description, wikitext input, + * and the expected HTML output. + * + * Prints status updates on stdout and counts up the total + * number and percentage of passed tests. + * + * @param string $filename + * @return bool True if passed all tests, false if any tests failed. + * @public + */ + function runTestsFromFile( $filename ) { + $infile = fopen( $filename, 'rt' ); + if( !$infile ) { + wfDie( "Couldn't open $filename\n" ); + } + + $data = array(); + $section = null; + $success = 0; + $total = 0; + $n = 0; + while( false !== ($line = fgets( $infile ) ) ) { + $n++; + if( preg_match( '/^!!\s*(\w+)/', $line, $matches ) ) { + $section = strtolower( $matches[1] ); + if( $section == 'endarticle') { + if( !isset( $data['text'] ) ) { + wfDie( "'endarticle' without 'text' at line $n\n" ); + } + if( !isset( $data['article'] ) ) { + wfDie( "'endarticle' without 'article' at line $n\n" ); + } + $this->addArticle($this->chomp($data['article']), $this->chomp($data['text']), $n); + $data = array(); + $section = null; + continue; + } + if( $section == 'endhooks' ) { + if( !isset( $data['hooks'] ) ) { + wfDie( "'endhooks' without 'hooks' at line $n\n" ); + } + foreach( explode( "\n", $data['hooks'] ) as $line ) { + $line = trim( $line ); + if( $line ) { + $this->requireHook( $line ); + } + } + $data = array(); + $section = null; + continue; + } + if( $section == 'end' ) { + if( !isset( $data['test'] ) ) { + wfDie( "'end' without 'test' at line $n\n" ); + } + if( !isset( $data['input'] ) ) { + wfDie( "'end' without 'input' at line $n\n" ); + } + if( !isset( $data['result'] ) ) { + wfDie( "'end' without 'result' at line $n\n" ); + } + if( !isset( $data['options'] ) ) { + $data['options'] = ''; + } + else { + $data['options'] = $this->chomp( $data['options'] ); + } + if (preg_match('/\\bdisabled\\b/i', $data['options']) + || !preg_match("/{$this->regex}/i", $data['test'])) { + # disabled test + $data = array(); + $section = null; + continue; + } + if( $this->runTest( + $this->chomp( $data['test'] ), + $this->chomp( $data['input'] ), + $this->chomp( $data['result'] ), + $this->chomp( $data['options'] ) ) ) { + $success++; + } + $total++; + $data = array(); + $section = null; + continue; + } + if ( isset ($data[$section] ) ) { + wfDie( "duplicate section '$section' at line $n\n" ); + } + $data[$section] = ''; + continue; + } + if( $section ) { + $data[$section] .= $line; + } + } + if( $total > 0 ) { + $ratio = wfPercent( 100 * $success / $total ); + print $this->termColor( 1 ) . "\nPassed $success of $total tests ($ratio) "; + if( $success == $total ) { + print $this->termColor( 32 ) . "PASSED!"; + } else { + print $this->termColor( 31 ) . "FAILED!"; + } + print $this->termReset() . "\n"; + return ($success == $total); + } else { + wfDie( "No tests found.\n" ); + } + } + + /** + * Run a given wikitext input through a freshly-constructed wiki parser, + * and compare the output against the expected results. + * Prints status and explanatory messages to stdout. + * + * @param string $input Wikitext to try rendering + * @param string $result Result to output + * @return bool + */ + function runTest( $desc, $input, $result, $opts ) { + if( !$this->quiet ) { + $this->showTesting( $desc ); + } + + $this->setupGlobals($opts); + + $user =& new User(); + $options = ParserOptions::newFromUser( $user ); + + if (preg_match('/\\bmath\\b/i', $opts)) { + # XXX this should probably be done by the ParserOptions + $options->setUseTex(true); + } + + if (preg_match('/title=\[\[(.*)\]\]/', $opts, $m)) { + $titleText = $m[1]; + } + else { + $titleText = 'Parser test'; + } + + $noxml = (bool)preg_match( '~\\b noxml \\b~x', $opts ); + + $parser =& new Parser(); + foreach( $this->hooks as $tag => $callback ) { + $parser->setHook( $tag, $callback ); + } + wfRunHooks( 'ParserTestParser', array( &$parser ) ); + + $title =& Title::makeTitle( NS_MAIN, $titleText ); + + if (preg_match('/\\bpst\\b/i', $opts)) { + $out = $parser->preSaveTransform( $input, $title, $user, $options ); + } elseif (preg_match('/\\bmsg\\b/i', $opts)) { + $out = $parser->transformMsg( $input, $options ); + } elseif( preg_match( '/\\bsection=(\d+)\b/i', $opts, $matches ) ) { + $section = intval( $matches[1] ); + $out = $parser->getSection( $input, $section ); + } elseif( preg_match( '/\\breplace=(\d+),"(.*?)"/i', $opts, $matches ) ) { + $section = intval( $matches[1] ); + $replace = $matches[2]; + $out = $parser->replaceSection( $input, $section, $replace ); + } else { + $output = $parser->parse( $input, $title, $options, true, true, 1337 ); + $out = $output->getText(); + + if (preg_match('/\\bill\\b/i', $opts)) { + $out = $this->tidy( implode( ' ', $output->getLanguageLinks() ) ); + } else if (preg_match('/\\bcat\\b/i', $opts)) { + global $wgOut; + $wgOut->addCategoryLinks($output->getCategories()); + $out = $this->tidy ( implode( ' ', $wgOut->getCategoryLinks() ) ); + } + + $result = $this->tidy($result); + } + + $this->teardownGlobals(); + + if( $result === $out && ( $noxml === true || $this->wellFormed( $out ) ) ) { + return $this->showSuccess( $desc ); + } else { + return $this->showFailure( $desc, $result, $out ); + } + } + + /** + * Set up the global variables for a consistent environment for each test. + * Ideally this should replace the global configuration entirely. + * + * @private + */ + function setupGlobals($opts = '') { + # Save the prefixed / quoted table names for later use when we make the temporaries. + $db =& wfGetDB( DB_READ ); + $this->oldTableNames = array(); + foreach( $this->listTables() as $table ) { + $this->oldTableNames[$table] = $db->tableName( $table ); + } + if( !isset( $this->uploadDir ) ) { + $this->uploadDir = $this->setupUploadDir(); + } + + if( preg_match( '/language=([a-z]+(?:_[a-z]+)?)/', $opts, $m ) ) { + $lang = $m[1]; + } else { + $lang = 'en'; + } + + $settings = array( + 'wgServer' => 'http://localhost', + 'wgScript' => '/index.php', + 'wgScriptPath' => '/', + 'wgArticlePath' => '/wiki/$1', + 'wgActionPaths' => array(), + 'wgUploadPath' => 'http://example.com/images', + 'wgUploadDirectory' => $this->uploadDir, + 'wgStyleSheetPath' => '/skins', + 'wgSitename' => 'MediaWiki', + 'wgServerName' => 'Britney Spears', + 'wgLanguageCode' => $lang, + 'wgContLanguageCode' => $lang, + 'wgDBprefix' => 'parsertest_', + 'wgDefaultUserOptions' => array(), + + 'wgLang' => null, + 'wgContLang' => null, + 'wgNamespacesWithSubpages' => array( 0 => preg_match('/\\bsubpage\\b/i', $opts)), + 'wgMaxTocLevel' => 999, + 'wgCapitalLinks' => true, + 'wgDefaultUserOptions' => array(), + 'wgNoFollowLinks' => true, + 'wgThumbnailScriptPath' => false, + 'wgUseTeX' => false, + 'wgLocaltimezone' => 'UTC', + 'wgAllowExternalImages' => true, + ); + $this->savedGlobals = array(); + foreach( $settings as $var => $val ) { + $this->savedGlobals[$var] = $GLOBALS[$var]; + $GLOBALS[$var] = $val; + } + $langClass = 'Language' . str_replace( '-', '_', ucfirst( $lang ) ); + $langObj = setupLangObj( $langClass ); + $GLOBALS['wgLang'] = $langObj; + $GLOBALS['wgContLang'] = $langObj; + + $GLOBALS['wgLoadBalancer']->loadMasterPos(); + $GLOBALS['wgMessageCache'] = new MessageCache( new BagOStuff(), false, 0, $GLOBALS['wgDBname'] ); + $this->setupDatabase(); + + global $wgUser; + $wgUser = new User(); + } + + # List of temporary tables to create, without prefix + # Some of these probably aren't necessary + function listTables() { + $tables = array('user', 'page', 'revision', 'text', + 'pagelinks', 'imagelinks', 'categorylinks', + 'templatelinks', 'externallinks', 'langlinks', + 'site_stats', 'hitcounter', + 'ipblocks', 'image', 'oldimage', + 'recentchanges', + 'watchlist', 'math', 'searchindex', + 'interwiki', 'querycache', + 'objectcache', 'job' + ); + + // FIXME manually adding additional table for the tasks extension + // we probably need a better software wide system to register new + // tables. + global $wgExtensionFunctions; + if( in_array('wfTasksExtension' , $wgExtensionFunctions ) ) { + $tables[] = 'tasks'; + } + + return $tables; + } + + /** + * Set up a temporary set of wiki tables to work with for the tests. + * Currently this will only be done once per run, and any changes to + * the db will be visible to later tests in the run. + * + * @private + */ + function setupDatabase() { + static $setupDB = false; + global $wgDBprefix; + + # Make sure we don't mess with the live DB + if (!$setupDB && $wgDBprefix === 'parsertest_') { + # oh teh horror + $GLOBALS['wgLoadBalancer'] = LoadBalancer::newFromParams( $GLOBALS['wgDBservers'] ); + $db =& wfGetDB( DB_MASTER ); + + $tables = $this->listTables(); + + if (!(strcmp($db->getServerVersion(), '4.1') < 0 and stristr($db->getSoftwareLink(), 'MySQL'))) { + # Database that supports CREATE TABLE ... LIKE + global $wgDBtype; + if( $wgDBtype == 'postgres' ) { + $def = 'INCLUDING DEFAULTS'; + } else { + $def = ''; + } + foreach ($tables as $tbl) { + $newTableName = $db->tableName( $tbl ); + $tableName = $this->oldTableNames[$tbl]; + $db->query("CREATE TEMPORARY TABLE $newTableName (LIKE $tableName $def)"); + } + } else { + # Hack for MySQL versions < 4.1, which don't support + # "CREATE TABLE ... LIKE". Note that + # "CREATE TEMPORARY TABLE ... SELECT * FROM ... LIMIT 0" + # would not create the indexes we need.... + foreach ($tables as $tbl) { + $res = $db->query("SHOW CREATE TABLE {$this->oldTableNames[$tbl]}"); + $row = $db->fetchRow($res); + $create = $row[1]; + $create_tmp = preg_replace('/CREATE TABLE `(.*?)`/', 'CREATE TEMPORARY TABLE `' + . $wgDBprefix . $tbl .'`', $create); + if ($create === $create_tmp) { + # Couldn't do replacement + wfDie("could not create temporary table $tbl"); + } + $db->query($create_tmp); + } + + } + + # Hack: insert a few Wikipedia in-project interwiki prefixes, + # for testing inter-language links + $db->insert( 'interwiki', array( + array( 'iw_prefix' => 'Wikipedia', + 'iw_url' => 'http://en.wikipedia.org/wiki/$1', + 'iw_local' => 0 ), + array( 'iw_prefix' => 'MeatBall', + 'iw_url' => 'http://www.usemod.com/cgi-bin/mb.pl?$1', + 'iw_local' => 0 ), + array( 'iw_prefix' => 'zh', + 'iw_url' => 'http://zh.wikipedia.org/wiki/$1', + 'iw_local' => 1 ), + array( 'iw_prefix' => 'es', + 'iw_url' => 'http://es.wikipedia.org/wiki/$1', + 'iw_local' => 1 ), + array( 'iw_prefix' => 'fr', + 'iw_url' => 'http://fr.wikipedia.org/wiki/$1', + 'iw_local' => 1 ), + array( 'iw_prefix' => 'ru', + 'iw_url' => 'http://ru.wikipedia.org/wiki/$1', + 'iw_local' => 1 ), + ) ); + + # Hack: Insert an image to work with + $db->insert( 'image', array( + 'img_name' => 'Foobar.jpg', + 'img_size' => 12345, + 'img_description' => 'Some lame file', + 'img_user' => 1, + 'img_user_text' => 'WikiSysop', + 'img_timestamp' => $db->timestamp( '20010115123500' ), + 'img_width' => 1941, + 'img_height' => 220, + 'img_bits' => 24, + 'img_media_type' => MEDIATYPE_BITMAP, + 'img_major_mime' => "image", + 'img_minor_mime' => "jpeg", + ) ); + + # Update certain things in site_stats + $db->insert( 'site_stats', array( 'ss_row_id' => 1, 'ss_images' => 1, 'ss_good_articles' => 1 ) ); + + $setupDB = true; + } + } + + /** + * Create a dummy uploads directory which will contain a couple + * of files in order to pass existence tests. + * @return string The directory + * @private + */ + function setupUploadDir() { + global $IP; + + $dir = wfTempDir() . "/mwParser-" . mt_rand() . "-images"; + mkdir( $dir ); + mkdir( $dir . '/3' ); + mkdir( $dir . '/3/3a' ); + + $img = "$IP/skins/monobook/headbg.jpg"; + $h = fopen($img, 'r'); + $c = fread($h, filesize($img)); + fclose($h); + + $f = fopen( $dir . '/3/3a/Foobar.jpg', 'wb' ); + fwrite( $f, $c ); + fclose( $f ); + return $dir; + } + + /** + * Restore default values and perform any necessary clean-up + * after each test runs. + * + * @private + */ + function teardownGlobals() { + foreach( $this->savedGlobals as $var => $val ) { + $GLOBALS[$var] = $val; + } + if( isset( $this->uploadDir ) ) { + $this->teardownUploadDir( $this->uploadDir ); + unset( $this->uploadDir ); + } + } + + /** + * Remove the dummy uploads directory + * @private + */ + function teardownUploadDir( $dir ) { + unlink( "$dir/3/3a/Foobar.jpg" ); + rmdir( "$dir/3/3a" ); + rmdir( "$dir/3" ); + @rmdir( "$dir/thumb/6/65" ); + @rmdir( "$dir/thumb/6" ); + + @unlink( "$dir/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" ); + @rmdir( "$dir/thumb/3/3a/Foobar.jpg" ); + @rmdir( "$dir/thumb/3/3a" ); + @rmdir( "$dir/thumb/3/39" ); # wtf? + @rmdir( "$dir/thumb/3" ); + @rmdir( "$dir/thumb" ); + @rmdir( "$dir" ); + } + + /** + * "Running test $desc..." + * @private + */ + function showTesting( $desc ) { + print "Running test $desc... "; + } + + /** + * Print a happy success message. + * + * @param string $desc The test name + * @return bool + * @private + */ + function showSuccess( $desc ) { + if( !$this->quiet ) { + print $this->termColor( '1;32' ) . 'PASSED' . $this->termReset() . "\n"; + } + return true; + } + + /** + * Print a failure message and provide some explanatory output + * about what went wrong if so configured. + * + * @param string $desc The test name + * @param string $result Expected HTML output + * @param string $html Actual HTML output + * @return bool + * @private + */ + function showFailure( $desc, $result, $html ) { + if( $this->quiet ) { + # In quiet mode we didn't show the 'Testing' message before the + # test, in case it succeeded. Show it now: + $this->showTesting( $desc ); + } + print $this->termColor( '1;31' ) . 'FAILED!' . $this->termReset() . "\n"; + if( $this->showDiffs ) { + print $this->quickDiff( $result, $html ); + if( !$this->wellFormed( $html ) ) { + print "XML error: $this->mXmlError\n"; + } + } + return false; + } + + /** + * Run given strings through a diff and return the (colorized) output. + * Requires writable /tmp directory and a 'diff' command in the PATH. + * + * @param string $input + * @param string $output + * @param string $inFileTail Tailing for the input file name + * @param string $outFileTail Tailing for the output file name + * @return string + * @private + */ + function quickDiff( $input, $output, $inFileTail='expected', $outFileTail='actual' ) { + $prefix = wfTempDir() . "/mwParser-" . mt_rand(); + + $infile = "$prefix-$inFileTail"; + $this->dumpToFile( $input, $infile ); + + $outfile = "$prefix-$outFileTail"; + $this->dumpToFile( $output, $outfile ); + + $diff = `diff -au $infile $outfile`; + unlink( $infile ); + unlink( $outfile ); + + return $this->colorDiff( $diff ); + } + + /** + * Write the given string to a file, adding a final newline. + * + * @param string $data + * @param string $filename + * @private + */ + function dumpToFile( $data, $filename ) { + $file = fopen( $filename, "wt" ); + fwrite( $file, $data . "\n" ); + fclose( $file ); + } + + /** + * Return ANSI terminal escape code for changing text attribs/color, + * or empty string if color output is disabled. + * + * @param string $color Semicolon-separated list of attribute/color codes + * @return string + * @private + */ + function termColor( $color ) { + if($this->lightcolor) { + return $this->color ? "\x1b[1;{$color}m" : ''; + } else { + return $this->color ? "\x1b[{$color}m" : ''; + } + } + + /** + * Return ANSI terminal escape code for restoring default text attributes, + * or empty string if color output is disabled. + * + * @return string + * @private + */ + function termReset() { + return $this->color ? "\x1b[0m" : ''; + } + + /** + * Colorize unified diff output if set for ANSI color output. + * Subtractions are colored blue, additions red. + * + * @param string $text + * @return string + * @private + */ + function colorDiff( $text ) { + return preg_replace( + array( '/^(-.*)$/m', '/^(\+.*)$/m' ), + array( $this->termColor( 34 ) . '$1' . $this->termReset(), + $this->termColor( 31 ) . '$1' . $this->termReset() ), + $text ); + } + + /** + * Insert a temporary test article + * @param string $name the title, including any prefix + * @param string $text the article text + * @param int $line the input line number, for reporting errors + * @private + */ + function addArticle($name, $text, $line) { + $this->setupGlobals(); + $title = Title::newFromText( $name ); + if ( is_null($title) ) { + wfDie( "invalid title at line $line\n" ); + } + + $aid = $title->getArticleID( GAID_FOR_UPDATE ); + if ($aid != 0) { + wfDie( "duplicate article at line $line\n" ); + } + + $art = new Article($title); + $art->insertNewArticle($text, '', false, false ); + $this->teardownGlobals(); + } + + /** + * Steal a callback function from the primary parser, save it for + * application to our scary parser. If the hook is not installed, + * die a painful dead to warn the others. + * @param string $name + */ + private function requireHook( $name ) { + global $wgParser; + if( isset( $wgParser->mTagHooks[$name] ) ) { + $this->hooks[$name] = $wgParser->mTagHooks[$name]; + } else { + wfDie( "This test suite requires the '$name' hook extension.\n" ); + } + } + + /* + * Run the "tidy" command on text if the $wgUseTidy + * global is true + * + * @param string $text the text to tidy + * @return string + * @static + * @private + */ + function tidy( $text ) { + global $wgUseTidy; + if ($wgUseTidy) { + $text = Parser::tidy($text); + } + return $text; + } + + function wellFormed( $text ) { + $html = + Sanitizer::hackDocType() . + '<html>' . + $text . + '</html>'; + + $parser = xml_parser_create( "UTF-8" ); + + # case folding violates XML standard, turn it off + xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); + + if( !xml_parse( $parser, $html, true ) ) { + $err = xml_error_string( xml_get_error_code( $parser ) ); + $position = xml_get_current_byte_index( $parser ); + $fragment = $this->extractFragment( $html, $position ); + $this->mXmlError = "$err at byte $position:\n$fragment"; + xml_parser_free( $parser ); + return false; + } + xml_parser_free( $parser ); + return true; + } + + function extractFragment( $text, $position ) { + $start = max( 0, $position - 10 ); + $before = $position - $start; + $fragment = '...' . + $this->termColor( 34 ) . + substr( $text, $start, $before ) . + $this->termColor( 0 ) . + $this->termColor( 31 ) . + $this->termColor( 1 ) . + substr( $text, $position, 1 ) . + $this->termColor( 0 ) . + $this->termColor( 34 ) . + substr( $text, $position + 1, 9 ) . + $this->termColor( 0 ) . + '...'; + $display = str_replace( "\n", ' ', $fragment ); + $caret = ' ' . + str_repeat( ' ', $before ) . + $this->termColor( 31 ) . + '^' . + $this->termColor( 0 ); + return "$display\n$caret"; + } + +} + +?> diff --git a/maintenance/parserTests.php b/maintenance/parserTests.php new file mode 100644 index 00000000..eac7adb0 --- /dev/null +++ b/maintenance/parserTests.php @@ -0,0 +1,64 @@ +<?php +# Copyright (C) 2004 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require('parserTests.inc'); + +if( isset( $options['help'] ) ) { + echo <<<END +MediaWiki $wgVersion parser test suite +Usage: php parserTests.php [--quick] [--quiet] [--color[=(yes|no|light)]] + [--regex=<expression>] [--file=<testfile>] + [--help] +Options: + --quick Suppress diff output of failed tests + --quiet Suppress notification of passed tests (shows only failed tests) + --color Override terminal detection and force color output on or off + 'light' option is similar to 'yes' but with color for dark backgrounds + --regex Only run tests whose descriptions which match given regex + --file Run test cases from a custom file instead of parserTests.txt + --help Show this help message + + +END; + exit( 0 ); +} + +# There is a convention that the parser should never +# refer to $wgTitle directly, but instead use the title +# passed to it. +$wgTitle = Title::newFromText( 'Parser test script do not use' ); +$tester =& new ParserTest(); + +if( isset( $options['file'] ) ) { + $file = $options['file']; +} else { + # Note: the command line setup changes the current working directory + # to the parent, which is why we have to put the subdir here: + $file = $IP.'/maintenance/parserTests.txt'; +} +$ok = $tester->runTestsFromFile( $file ); + +exit ($ok ? 0 : -1); +?> diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt new file mode 100644 index 00000000..0238051c --- /dev/null +++ b/maintenance/parserTests.txt @@ -0,0 +1,5475 @@ +# MediaWiki Parser test cases +# Some taken from http://meta.wikimedia.org/wiki/Parser_testing +# All (C) their respective authors and released under the GPL +# +# The syntax should be fairly self-explanatory. +# +# Currently supported test options: +# One of the following three: +# +# (default) generate HTML output +# pst apply pre-save transform +# msg apply message transform +# +# Plus any combination of these: +# +# cat add category links +# ill add inter-language links +# subpage enable subpages (disabled by default) +# noxml don't check for XML well formdness +# title=[[XXX]] run test using article title XXX +# language=XXX set content language to XXX for this test +# disabled do not run test +# +# For testing purposes, temporary articles can created: +# !!article / NAMESPACE:TITLE / !!text / ARTICLE TEXT / !!endarticle +# where '/' denotes a newline. + +# This is the standard article assumed to exist. +!! article +Main Page +!! text +blah blah +!! endarticle + +### +### Basic tests +### +!! test +Blank input +!! input +!! result +!! end + + +!! test +Simple paragraph +!! input +This is a simple paragraph. +!! result +<p>This is a simple paragraph. +</p> +!! end + +!! test +Simple list +!! input +* Item 1 +* Item 2 +!! result +<ul><li> Item 1 +</li><li> Item 2 +</li></ul> + +!! end + +!! test +Italics and bold +!! input +* plain +* plain''italic''plain +* plain''italic''plain''italic''plain +* plain'''bold'''plain +* plain'''bold'''plain'''bold'''plain +* plain''italic''plain'''bold'''plain +* plain'''bold'''plain''italic''plain +* plain''italic'''bold-italic'''italic''plain +* plain'''bold''bold-italic''bold'''plain +* plain'''''bold-italic'''italic''plain +* plain'''''bold-italic''bold'''plain +* plain''italic'''bold-italic'''''plain +* plain'''bold''bold-italic'''''plain +* plain l'''italic''plain +!! result +<ul><li> plain +</li><li> plain<i>italic</i>plain +</li><li> plain<i>italic</i>plain<i>italic</i>plain +</li><li> plain<b>bold</b>plain +</li><li> plain<b>bold</b>plain<b>bold</b>plain +</li><li> plain<i>italic</i>plain<b>bold</b>plain +</li><li> plain<b>bold</b>plain<i>italic</i>plain +</li><li> plain<i>italic<b>bold-italic</b>italic</i>plain +</li><li> plain<b>bold<i>bold-italic</i>bold</b>plain +</li><li> plain<i><b>bold-italic</b>italic</i>plain +</li><li> plain<b><i>bold-italic</i>bold</b>plain +</li><li> plain<i>italic<b>bold-italic</b></i>plain +</li><li> plain<b>bold<i>bold-italic</i></b>plain +</li><li> plain l'<i>italic</i>plain +</li></ul> + +!! end + +### +### <nowiki> test cases +### + +!! test +<nowiki> unordered list +!! input +<nowiki>* This is not an unordered list item.</nowiki> +!! result +<p>* This is not an unordered list item. +</p> +!! end + +!! test +<nowiki> spacing +!! input +<nowiki>Lorem ipsum dolor + +sed abit. + sed nullum. + +:and a colon +</nowiki> +!! result +<p>Lorem ipsum dolor + +sed abit. + sed nullum. + +:and a colon + +</p> +!! end + +!! test +nowiki 3 +!! input +:There is not nowiki. +:There is <nowiki>nowiki</nowiki>. + +#There is not nowiki. +#There is <nowiki>nowiki</nowiki>. + +*There is not nowiki. +*There is <nowiki>nowiki</nowiki>. +!! result +<dl><dd>There is not nowiki. +</dd><dd>There is nowiki. +</dd></dl> +<ol><li>There is not nowiki. +</li><li>There is nowiki. +</li></ol> +<ul><li>There is not nowiki. +</li><li>There is nowiki. +</li></ul> + +!! end + + +### +### Comments +### +!! test +Comment test 1 +!! input +<!-- comment 1 --> asdf +<!-- comment 2 --> +!! result +<pre>asdf +</pre> + +!! end + +!! test +Comment test 2 +!! input +asdf +<!-- comment 1 --> +jkl +!! result +<p>asdf +jkl +</p> +!! end + +!! test +Comment test 3 +!! input +asdf +<!-- comment 1 --> +<!-- comment 2 --> +jkl +!! result +<p>asdf +jkl +</p> +!! end + +!! test +Comment test 4 +!! input +asdf<!-- comment 1 -->jkl +!! result +<p>asdfjkl +</p> +!! end + +!! test +Comment spacing +!! input +a + <!-- foo --> b <!-- bar --> +c +!! result +<p>a +</p> +<pre> b +</pre> +<p>c +</p> +!! end + +!! test +Comment whitespace +!! input +<!-- returns a single newline, not nothing, since the newline after > is not stripped --> +!! result + +!! end + +!! test +Comment semantics and delimiters +!! input +<!-- --><!----><!-----><!------> +!! result + +!! end + +!! test +Comment semantics and delimiters, redux +!! input +<!-- In SGML every "foo" here would actually show up in the text -- foo -- bar +-- foo -- funky huh? ... --> +!! result + +!! end + +!! test +Comment semantics and delimiters: directors cut +!! input +<!-- ... However we like to keep things simple and somewhat XML-ish so we eat +everything starting with < followed by !-- until the first -- and > we see, +that wouldn't be valid XML however, since in XML -- has to terminate a comment +-->--> +!! result +<p>--> +</p> +!! end + +!! test +Comment semantics: nesting +!! input +<!--<!-- no, we're not going to do anything fancy here -->--> +!! result +<p>--> +</p> +!! end + +!! test +Comment semantics: unclosed comment at end +!! input +<!--This comment will run out to the end of the document +!! result + +!! end + + +### +### Preformatted text +### +!! test +Preformatted text +!! input + This is some + Preformatted text + With ''italic'' + And '''bold''' + And a [[Main Page|link]] +!! result +<pre>This is some +Preformatted text +With <i>italic</i> +And <b>bold</b> +And a <a href="/wiki/Main_Page" title="Main Page">link</a> +</pre> +!! end + +!! test +<pre> with <nowiki> inside (compatibility with 1.6 and earlier) +!! input +<pre><nowiki> +<b> +<cite> +<em> +</nowiki></pre> +!! result +<pre> +<b> +<cite> +<em> +</pre> + +!! end + +!! test +Regression with preformatted in <center> +!! input +<center> + Blah +</center> +!! result +<center> +<pre>Blah +</pre> +</center> + +!! end + +!! test +<pre> with attributes (bug 3202) +!! input +<pre style="background: blue; color:white">Bluescreen of WikiDeath</pre> +!! result +<pre style="background: blue; color:white">Bluescreen of WikiDeath</pre> + +!! end + +!! test +<pre> with width attribute (bug 3202) +!! input +<pre width="8">Narrow screen goodies</pre> +!! result +<pre width="8">Narrow screen goodies</pre> + +!! end + +!! test +<pre> with forbidden attribute (bug 3202) +!! input +<pre width="8" onmouseover="alert(document.cookie)">Narrow screen goodies</pre> +!! result +<pre width="8">Narrow screen goodies</pre> + +!! end + +!! test +<pre> with forbidden attribute values (bug 3202) +!! input +<pre width="8" style="border-width: expression(alert(document.cookie))">Narrow screen goodies</pre> +!! result +<pre width="8">Narrow screen goodies</pre> + +!! end + +### +### Definition lists +### +!! test +Simple definition +!! input +; name : Definition +!! result +<dl><dt> name </dt><dd> Definition +</dd></dl> + +!! end + +!! test +Simple definition +!! input +: Indented text +!! result +<dl><dd> Indented text +</dd></dl> + +!! end + +!! test +Definition list with no space +!! input +;name:Definition +!! result +<dl><dt>name</dt><dd>Definition +</dd></dl> + +!!end + +!! test +Definition list with URL link +!! input +; http://example.com/ : definition +!! result +<dl><dt> <a href="http://example.com/" class="external free" title="http://example.com/" rel="nofollow">http://example.com/</a> </dt><dd> definition +</dd></dl> + +!! end + +!! test +Definition list with bracketed URL link +!! input +;[http://www.example.com/ Example]:Something about it +!! result +<dl><dt><a href="http://www.example.com/" class="external text" title="http://www.example.com/" rel="nofollow">Example</a></dt><dd>Something about it +</dd></dl> + +!! end + +!! test +Definition list with wikilink containing colon +!! input +; [[Help:FAQ]]: The least-read page on Wikipedia +!! result +<dl><dt> <a href="/index.php?title=Help:FAQ&action=edit" class="new" title="Help:FAQ">Help:FAQ</a></dt><dd> The least-read page on Wikipedia +</dd></dl> + +!! end + +# At Brion's and JeLuF's insistence... :) +!! test +Definition list with wikilink containing colon +!! input +; news:alt.wikipedia.rox: This isn't even a real newsgroup! +!! result +<dl><dt> <a href="news:alt.wikipedia.rox" class="external free" title="news:alt.wikipedia.rox" rel="nofollow">news:alt.wikipedia.rox</a></dt><dd> This isn't even a real newsgroup! +</dd></dl> + +!! end + +!! test +Malformed definition list with colon +!! input +; news:alt.wikipedia.rox -- don't crash or enter an infinite loop +!! result +<dl><dt> <a href="news:alt.wikipedia.rox" class="external free" title="news:alt.wikipedia.rox" rel="nofollow">news:alt.wikipedia.rox</a> -- don't crash or enter an infinite loop +</dt></dl> + +!! end + +!! test +Definition lists: colon in external link text +!! input +; [http://www.wikipedia2.org/ Wikipedia : The Next Generation]: OK, I made that up +!! result +<dl><dt> <a href="http://www.wikipedia2.org/" class="external text" title="http://www.wikipedia2.org/" rel="nofollow">Wikipedia : The Next Generation</a></dt><dd> OK, I made that up +</dd></dl> + +!! end + +!! test +Definition lists: colon in HTML attribute +!! input +;<b style="display: inline">bold</b> +!! result +<dl><dt><b style="display: inline">bold</b> +</dt></dl> + +!! end + + +!! test +Definition lists: self-closed tag +!! input +;one<br/>two : two-line fun +!! result +<dl><dt>one<br />two </dt><dd> two-line fun +</dd></dl> + +!! end + + +### +### External links +### +!! test +External links: non-bracketed +!! input +Non-bracketed: http://example.com +!! result +<p>Non-bracketed: <a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a> +</p> +!! end + +!! test +External links: numbered +!! input +Numbered: [http://example.com] +Numbered: [http://example.net] +Numbered: [http://example.org] +!! result +<p>Numbered: <a href="http://example.com" class="external autonumber" title="http://example.com" rel="nofollow">[1]</a> +Numbered: <a href="http://example.net" class="external autonumber" title="http://example.net" rel="nofollow">[2]</a> +Numbered: <a href="http://example.org" class="external autonumber" title="http://example.org" rel="nofollow">[3]</a> +</p> +!!end + +!! test +External links: specified text +!! input +Specified text: [http://example.com link] +!! result +<p>Specified text: <a href="http://example.com" class="external text" title="http://example.com" rel="nofollow">link</a> +</p> +!!end + +!! test +External links: trail +!! input +Linktrails should not work for external links: [http://example.com link]s +!! result +<p>Linktrails should not work for external links: <a href="http://example.com" class="external text" title="http://example.com" rel="nofollow">link</a>s +</p> +!! end + +!! test +External links: dollar sign in URL +!! input +http://example.com/1$2345 +!! result +<p><a href="http://example.com/1$2345" class="external free" title="http://example.com/1$2345" rel="nofollow">http://example.com/1$2345</a> +</p> +!! end + +!! test +External links: dollar sign in URL (named) +!! input +[http://example.com/1$2345] +!! result +<p><a href="http://example.com/1$2345" class="external autonumber" title="http://example.com/1$2345" rel="nofollow">[1]</a> +</p> +!!end + +!! test +External links: open square bracket forbidden in URL (bug 4377) +!! input +http://example.com/1[2345 +!! result +<p><a href="http://example.com/1" class="external free" title="http://example.com/1" rel="nofollow">http://example.com/1</a>[2345 +</p> +!! end + +!! test +External links: open square bracket forbidden in URL (named) (bug 4377) +!! input +[http://example.com/1[2345] +!! result +<p><a href="http://example.com/1" class="external text" title="http://example.com/1" rel="nofollow">[2345</a> +</p> +!!end + +!! test +External links: nowiki in URL link text (bug 6230) +!!input +[http://example.com/ <nowiki>''example site''</nowiki>] +!! result +<p><a href="http://example.com/" class="external text" title="http://example.com/" rel="nofollow">''example site''</a> +</p> +!! end + +!! test +External links: newline forbidden in text (bug 6230 regression check) +!! input +[http://example.com/ first +second] +!! result +<p>[<a href="http://example.com/" class="external free" title="http://example.com/" rel="nofollow">http://example.com/</a> first +second] +</p> +!!end + +!! test +External image +!! input +External image: http://meta.wikimedia.org/upload/f/f1/Ncwikicol.png +!! result +<p>External image: <img src="http://meta.wikimedia.org/upload/f/f1/Ncwikicol.png" alt="Ncwikicol.png" /> +</p> +!! end + +!! test +External image from https +!! input +External image from https: https://meta.wikimedia.org/upload/f/f1/Ncwikicol.png +!! result +<p>External image from https: <img src="https://meta.wikimedia.org/upload/f/f1/Ncwikicol.png" alt="Ncwikicol.png" /> +</p> +!! end + +!! test +Link to non-http image, no img tag +!! input +Link to non-http image, no img tag: ftp://example.com/test.jpg +!! result +<p>Link to non-http image, no img tag: <a href="ftp://example.com/test.jpg" class="external free" title="ftp://example.com/test.jpg" rel="nofollow">ftp://example.com/test.jpg</a> +</p> +!! end + +!! test +External links: terminating separator +!! input +Terminating separator: http://example.com/thing, +!! result +<p>Terminating separator: <a href="http://example.com/thing" class="external free" title="http://example.com/thing" rel="nofollow">http://example.com/thing</a>, +</p> +!! end + +!! test +External links: intervening separator +!! input +Intervening separator: http://example.com/1,2,3 +!! result +<p>Intervening separator: <a href="http://example.com/1,2,3" class="external free" title="http://example.com/1,2,3" rel="nofollow">http://example.com/1,2,3</a> +</p> +!! end + +!! test +External links: old bug with URL in query +!! input +Old bug with URL in query: [http://example.com/thing?url=http://example.com link] +!! result +<p>Old bug with URL in query: <a href="http://example.com/thing?url=http://example.com" class="external text" title="http://example.com/thing?url=http://example.com" rel="nofollow">link</a> +</p> +!! end + +!! test +External links: old URL-in-URL bug, mixed protocols +!! input +And again with mixed protocols: [ftp://example.com?url=http://example.com link] +!! result +<p>And again with mixed protocols: <a href="ftp://example.com?url=http://example.com" class="external text" title="ftp://example.com?url=http://example.com" rel="nofollow">link</a> +</p> +!!end + +!! test +External links: URL in text +!! input +URL in text: [http://example.com http://example.com] +!! result +<p>URL in text: <a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a> +</p> +!! end + +!! test +External links: Clickable images +!! input +ja-style clickable images: [http://example.com http://meta.wikimedia.org/upload/f/f1/Ncwikicol.png] +!! result +<p>ja-style clickable images: <a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"><img src="http://meta.wikimedia.org/upload/f/f1/Ncwikicol.png" alt="Ncwikicol.png" /></a> +</p> +!!end + +!! test +External links: raw ampersand +!! input +Old & use: http://x&y +!! result +<p>Old & use: <a href="http://x&y" class="external free" title="http://x&y" rel="nofollow">http://x&y</a> +</p> +!! end + +!! test +External links: encoded ampersand +!! input +Old & use: http://x&y +!! result +<p>Old & use: <a href="http://x&y" class="external free" title="http://x&y" rel="nofollow">http://x&y</a> +</p> +!! end + +!! test +External links: encoded equals (bug 6102) +!! input +http://example.com/?foo=bar +!! result +<p><a href="http://example.com/?foo=bar" class="external free" title="http://example.com/?foo=bar" rel="nofollow">http://example.com/?foo=bar</a> +</p> +!! end + +!! test +External links: [raw ampersand] +!! input +Old & use: [http://x&y] +!! result +<p>Old & use: <a href="http://x&y" class="external autonumber" title="http://x&y" rel="nofollow">[1]</a> +</p> +!! end + +!! test +External links: [encoded ampersand] +!! input +Old & use: [http://x&y] +!! result +<p>Old & use: <a href="http://x&y" class="external autonumber" title="http://x&y" rel="nofollow">[1]</a> +</p> +!! end + +!! test +External links: [encoded equals] (bug 6102) +!! input +[http://example.com/?foo=bar] +!! result +<p><a href="http://example.com/?foo=bar" class="external autonumber" title="http://example.com/?foo=bar" rel="nofollow">[1]</a> +</p> +!! end + +!! test +External links: www.jpeg.org (bug 554) +!! input +http://www.jpeg.org +!!result +<p><a href="http://www.jpeg.org" class="external free" title="http://www.jpeg.org" rel="nofollow">http://www.jpeg.org</a> +</p> +!! end + +!! test +External links: URL within URL (original bug 2) +!! input +[http://www.unausa.org/newindex.asp?place=http://www.unausa.org/programs/mun.asp] +!! result +<p><a href="http://www.unausa.org/newindex.asp?place=http://www.unausa.org/programs/mun.asp" class="external autonumber" title="http://www.unausa.org/newindex.asp?place=http://www.unausa.org/programs/mun.asp" rel="nofollow">[1]</a> +</p> +!! end + +!! test +BUG 361: URL inside bracketed URL +!! input +[http://www.example.com/foo http://www.example.com/bar] +!! result +<p><a href="http://www.example.com/foo" class="external text" title="http://www.example.com/foo" rel="nofollow">http://www.example.com/bar</a> +</p> +!! end + +!! test +BUG 361: URL within URL, not bracketed +!! input +http://www.example.com/foo?=http://www.example.com/bar +!! result +<p><a href="http://www.example.com/foo?=http://www.example.com/bar" class="external free" title="http://www.example.com/foo?=http://www.example.com/bar" rel="nofollow">http://www.example.com/foo?=http://www.example.com/bar</a> +</p> +!! end + +!! test +BUG 289: ">"-token in URL-tail +!! input +http://www.example.com/<hello> +!! result +<p><a href="http://www.example.com/" class="external free" title="http://www.example.com/" rel="nofollow">http://www.example.com/</a><hello> +</p> +!!end + +!! test +BUG 289: literal ">"-token in URL-tail +!! input +http://www.example.com/<b>html</b> +!! result +<p><a href="http://www.example.com/" class="external free" title="http://www.example.com/" rel="nofollow">http://www.example.com/</a><b>html</b> +</p> +!!end + +!! test +BUG 289: ">"-token in bracketed URL +!! input +[http://www.example.com/<hello> stuff] +!! result +<p><a href="http://www.example.com/" class="external text" title="http://www.example.com/" rel="nofollow"><hello> stuff</a> +</p> +!!end + +!! test +BUG 289: literal ">"-token in bracketed URL +!! input +[http://www.example.com/<b>html</b> stuff] +!! result +<p><a href="http://www.example.com/" class="external text" title="http://www.example.com/" rel="nofollow"><b>html</b> stuff</a> +</p> +!!end + +!! test +BUG 289: literal double quote at end of URL +!! input +http://www.example.com/"hello" +!! result +<p><a href="http://www.example.com/" class="external free" title="http://www.example.com/" rel="nofollow">http://www.example.com/</a>"hello" +</p> +!!end + +!! test +BUG 289: literal double quote in bracketed URL +!! input +[http://www.example.com/"hello" stuff] +!! result +<p><a href="http://www.example.com/" class="external text" title="http://www.example.com/" rel="nofollow">"hello" stuff</a> +</p> +!!end + +!! test +External links: invalid character +Fixme: the missing char seems to have gone missing +!! options +disabled +!! input +[http://www.example.com test] +!! result +<p>[<a href="http://www.example.com" class="external free" title="http://www.example.com" rel="nofollow">http://www.example.com</a> test] +</p> +!! end + +!! test +External links: multiple legal whitespace is fine, Magnus. Don't break it please. (bug 5081) +!! input +[http://www.example.com test] +!! result +<p><a href="http://www.example.com" class="external text" title="http://www.example.com" rel="nofollow">test</a> +</p> +!! end + +!! test +External links: wiki links within external link (Bug 3695) +!! input +[http://example.com [[wikilink]] embedded in ext link] +!! result +<p><a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"></a><a href="/index.php?title=Wikilink&action=edit" class="new" title="Wikilink">wikilink</a><a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"> embedded in ext link</a> +</p> +!! end + +!! test +BUG 787: Links with one slash after the url protocol are invalid +!! input +http:/example.com + +[http:/example.com title] +!! result +<p>http:/example.com +</p><p>[http:/example.com title] +</p> +!! end + +!! test +Bug 2702: Mismatched <i>, <b> and <a> tags are invalid +!! input +''[http://example.com text''] +[http://example.com '''text]''' +''Something [http://example.com in italic''] +''Something [http://example.com mixed''''', even bold]''' +'''''Now [http://example.com both'''''] +!! result +<p><a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"><i>text</i></a> +<a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"><b>text</b></a> +<i>Something </i><a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"><i>in italic</i></a> +<i>Something </i><a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"><i>mixed</i><b>, even bold</b></a> +<i><b>Now </b></i><a href="http://example.com" class="external text" title="http://example.com" rel="nofollow"><i><b>both</b></i></a> +</p> +!! end + + +!! test +Bug 4781: %26 in URL +!! input +http://www.example.com/?title=AT%26T +!! result +<p><a href="http://www.example.com/?title=AT%26T" class="external free" title="http://www.example.com/?title=AT%26T" rel="nofollow">http://www.example.com/?title=AT%26T</a> +</p> +!! end + +!! test +Bug 4781, 5267: %26 in URL +!! input +http://www.example.com/?title=100%25_Bran +!! result +<p><a href="http://www.example.com/?title=100%25_Bran" class="external free" title="http://www.example.com/?title=100%25_Bran" rel="nofollow">http://www.example.com/?title=100%25_Bran</a> +</p> +!! end + +!! test +Bug 4781, 5267: %28, %29 in URL +!! input +http://www.example.com/?title=Ben-Hur_%281959_film%29 +!! result +<p><a href="http://www.example.com/?title=Ben-Hur_%281959_film%29" class="external free" title="http://www.example.com/?title=Ben-Hur_%281959_film%29" rel="nofollow">http://www.example.com/?title=Ben-Hur_%281959_film%29</a> +</p> +!! end + + +!! test +Bug 4781: %26 in autonumber URL +!! input +[http://www.example.com/?title=AT%26T] +!! result +<p><a href="http://www.example.com/?title=AT%26T" class="external autonumber" title="http://www.example.com/?title=AT%26T" rel="nofollow">[1]</a> +</p> +!! end + +!! test +Bug 4781, 5267: %26 in autonumber URL +!! input +[http://www.example.com/?title=100%25_Bran] +!! result +<p><a href="http://www.example.com/?title=100%25_Bran" class="external autonumber" title="http://www.example.com/?title=100%25_Bran" rel="nofollow">[1]</a> +</p> +!! end + +!! test +Bug 4781, 5267: %28, %29 in autonumber URL +!! input +[http://www.example.com/?title=Ben-Hur_%281959_film%29] +!! result +<p><a href="http://www.example.com/?title=Ben-Hur_%281959_film%29" class="external autonumber" title="http://www.example.com/?title=Ben-Hur_%281959_film%29" rel="nofollow">[1]</a> +</p> +!! end + + +!! test +Bug 4781: %26 in bracketed URL +!! input +[http://www.example.com/?title=AT%26T link] +!! result +<p><a href="http://www.example.com/?title=AT%26T" class="external text" title="http://www.example.com/?title=AT%26T" rel="nofollow">link</a> +</p> +!! end + +!! test +Bug 4781, 5267: %26 in bracketed URL +!! input +[http://www.example.com/?title=100%25_Bran link] +!! result +<p><a href="http://www.example.com/?title=100%25_Bran" class="external text" title="http://www.example.com/?title=100%25_Bran" rel="nofollow">link</a> +</p> +!! end + +!! test +Bug 4781, 5267: %28, %29 in bracketed URL +!! input +[http://www.example.com/?title=Ben-Hur_%281959_film%29 link] +!! result +<p><a href="http://www.example.com/?title=Ben-Hur_%281959_film%29" class="external text" title="http://www.example.com/?title=Ben-Hur_%281959_film%29" rel="nofollow">link</a> +</p> +!! end + +!! test +External link containing double-single-quotes in text '' (bug 4598 sanity check) +!! input +Some [http://example.com/ pretty ''italics'' and stuff]! +!! result +<p>Some <a href="http://example.com/" class="external text" title="http://example.com/" rel="nofollow">pretty <i>italics</i> and stuff</a>! +</p> +!! end + +!! test +External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check) +!! input +''Some [http://example.com/ pretty ''italics'' and stuff]!'' +!! result +<p><i>Some </i><a href="http://example.com/" class="external text" title="http://example.com/" rel="nofollow"><i>pretty </i>italics<i> and stuff</i></a><i>!</i> +</p> +!! end + + + +### +### Quotes +### + +!! test +Quotes +!! input +Normal text. '''Bold text.''' Normal text. ''Italic text.'' + +Normal text. '''''Bold italic text.''''' Normal text. +!!result +<p>Normal text. <b>Bold text.</b> Normal text. <i>Italic text.</i> +</p><p>Normal text. <i><b>Bold italic text.</b></i> Normal text. +</p> +!! end + + +!! test +Unclosed and unmatched quotes +!! input +'''''Bold italic text '''with bold deactivated''' in between.''''' + +'''''Bold italic text ''with italic deactivated'' in between.''''' + +'''Bold text.. + +..spanning two paragraphs (should not work).''' + +'''Bold tag left open + +''Italic tag left open + +Normal text. + +<!-- Unmatching number of opening, closing tags: --> +'''This year''''s election ''should'' beat '''last year''''s. + +''Tom'''s car is bigger than ''Susan'''s. +!! result +<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i> +</p><p><b><i>Bold italic text </i>with italic deactivated<i> in between.</i></b> +</p><p><b>Bold text..</b> +</p><p>..spanning two paragraphs (should not work). +</p><p><b>Bold tag left open</b> +</p><p><i>Italic tag left open</i> +</p><p>Normal text. +</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s. +</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s. +</p> +!! end + +### +### Tables +### +### some content taken from http://meta.wikimedia.org/wiki/MediaWiki_User%27s_Guide:_Using_tables +### + +# This should not produce <table></table> as <table><tr><td></td></tr></table> +# is the bare minimun required by the spec, see: +# http://www.w3.org/TR/xhtml-modularization/dtd_module_defs.html#a_module_Basic_Tables +!! test +A table with no data. +!! input +{||} +!! result +!! end + +# A table with nothing but a caption is invalid XHTML, we might want to render +# this as <p>caption</p> +!! test +A table with nothing but a caption +!! input +{| +|+ caption +|} +!! result +<table> +<caption> caption +</caption><tr><td></td></tr></table> + +!! end + +!! test +Simple table +!! input +{| +| 1 || 2 +|- +| 3 || 4 +|} +!! result +<table> +<tr> +<td> 1 </td><td> 2 +</td></tr> +<tr> +<td> 3 </td><td> 4 +</td></tr></table> + +!! end + +!! test +Multiplication table +!! input +{| border="1" cellpadding="2" +|+Multiplication table +|- +! × !! 1 !! 2 !! 3 +|- +! 1 +| 1 || 2 || 3 +|- +! 2 +| 2 || 4 || 6 +|- +! 3 +| 3 || 6 || 9 +|- +! 4 +| 4 || 8 || 12 +|- +! 5 +| 5 || 10 || 15 +|} +!! result +<table border="1" cellpadding="2"> +<caption>Multiplication table +</caption> +<tr> +<th> × </th><th> 1 </th><th> 2 </th><th> 3 +</th></tr> +<tr> +<th> 1 +</th><td> 1 </td><td> 2 </td><td> 3 +</td></tr> +<tr> +<th> 2 +</th><td> 2 </td><td> 4 </td><td> 6 +</td></tr> +<tr> +<th> 3 +</th><td> 3 </td><td> 6 </td><td> 9 +</td></tr> +<tr> +<th> 4 +</th><td> 4 </td><td> 8 </td><td> 12 +</td></tr> +<tr> +<th> 5 +</th><td> 5 </td><td> 10 </td><td> 15 +</td></tr></table> + +!! end + +!! test +Table rowspan +!! input +{| align=right border=1 +| Cell 1, row 1 +|rowspan=2| Cell 2, row 1 (and 2) +| Cell 3, row 1 +|- +| Cell 1, row 2 +| Cell 3, row 2 +|} +!! result +<table align="right" border="1"> +<tr> +<td> Cell 1, row 1 +</td><td rowspan="2"> Cell 2, row 1 (and 2) +</td><td> Cell 3, row 1 +</td></tr> +<tr> +<td> Cell 1, row 2 +</td><td> Cell 3, row 2 +</td></tr></table> + +!! end + +!! test +Nested table +!! input +{| border=1 +| α +| +{| bgcolor=#ABCDEF border=2 +|nested +|- +|table +|} +|the original table again +|} +!! result +<table border="1"> +<tr> +<td> α +</td><td> +<table bgcolor="#ABCDEF" border="2"> +<tr> +<td>nested +</td></tr> +<tr> +<td>table +</td></tr></table> +</td><td>the original table again +</td></tr></table> + +!! end + +!! test +Invalid attributes in table cell (bug 1830) +!! input +{| +|Cell:|broken +|} +!! result +<table> +<tr> +<td>broken +</td></tr></table> + +!! end + + +# FIXME: this one has incorrect tag nesting still. +!! test +Table security: embedded pipes (http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html) +!! input +{| +| |[ftp://|x||]" onmouseover="alert(document.cookie)">test +!! result +<table> +<tr> +<td><a href="ftp://|x||" class="external autonumber" title="ftp://|x||" rel="nofollow">[1]</td><td></a>" onmouseover="alert(document.cookie)">test +</td> +</tr> +</table> + +!! end + + +### +### Internal links +### +!! test +Plain link, capitalized +!! input +[[Main Page]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">Main Page</a> +</p> +!! end + +!! test +Plain link, uncapitalized +!! input +[[main Page]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">main Page</a> +</p> +!! end + +!! test +Piped link +!! input +[[Main Page|The Main Page]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">The Main Page</a> +</p> +!! end + +!! test +Broken link +!! input +[[Zigzagzogzagzig]] +!! result +<p><a href="/index.php?title=Zigzagzogzagzig&action=edit" class="new" title="Zigzagzogzagzig">Zigzagzogzagzig</a> +</p> +!! end + +!! test +Link with prefix +!! input +xxx[[main Page]], xxx[[Main Page]], Xxx[[main Page]] XXX[[main Page]], XXX[[Main Page]] +!! result +<p>xxx<a href="/wiki/Main_Page" title="Main Page">main Page</a>, xxx<a href="/wiki/Main_Page" title="Main Page">Main Page</a>, Xxx<a href="/wiki/Main_Page" title="Main Page">main Page</a> XXX<a href="/wiki/Main_Page" title="Main Page">main Page</a>, XXX<a href="/wiki/Main_Page" title="Main Page">Main Page</a> +</p> +!! end + +!! test +Link with suffix +!! input +[[Main Page]]xxx, [[Main Page]]XXX +!! result +<p><a href="/wiki/Main_Page" title="Main Page">Main Pagexxx</a>, <a href="/wiki/Main_Page" title="Main Page">Main Page</a>XXX +</p> +!! end + +!! test +Link with 3 brackets +!! input +[[[main page]]] +!! result +<p>[[[main page]]] +</p> +!! end + +!! test +Piped link with 3 brackets +!! input +[[[main page|the main page]]] +!! result +<p>[[[main page|the main page]]] +</p> +!! end + +!! test +Link with multiple pipes +!! input +[[Main Page|The|Main|Page]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">The|Main|Page</a> +</p> +!! end + +!! test +Link to namespaces +!! input +[[Talk:Parser testing]], [[Meta:Disclaimers]] +!! result +<p><a href="/index.php?title=Talk:Parser_testing&action=edit" class="new" title="Talk:Parser testing">Talk:Parser testing</a>, <a href="/index.php?title=Meta:Disclaimers&action=edit" class="new" title="Meta:Disclaimers">Meta:Disclaimers</a> +</p> +!! end + +!! test +Piped link to namespace +!! input +[[Meta:Disclaimers|The disclaimers]] +!! result +<p><a href="/index.php?title=Meta:Disclaimers&action=edit" class="new" title="Meta:Disclaimers">The disclaimers</a> +</p> +!! end + +!! test +Link containing } +!! input +[[Usually caused by a typo (oops}]] +!! result +<p>[[Usually caused by a typo (oops}]] +</p> +!! end + +!! test +Link containing % (not as a hex sequence) +!! input +[[7% Solution]] +!! result +<p><a href="/index.php?title=7%25_Solution&action=edit" class="new" title="7% Solution">7% Solution</a> +</p> +!! end + +!! test +Link containing % as a single hex sequence interpreted to char +!! input +[[7%25 Solution]] +!! result +<p><a href="/index.php?title=7%25_Solution&action=edit" class="new" title="7% Solution">7% Solution</a> +</p> +!!end + +!! test +Link containing % as a double hex sequence interpreted to hex sequence +!! input +[[7%2525 Solution]] +!! result +<p>[[7%2525 Solution]] +</p> +!!end + +!! test +Link containing "#<" and "#>" % as a hex sequences- these are valid section anchors +Example for such a section: == < == +!! input +[[%23%3c]][[%23%3e]] +!! result +<p><a href="#.3C" title="">#<</a><a href="#.3E" title="">#></a> +</p> +!! end + +!! test +Link containing "<#" and ">#" as a hex sequences +!! input +[[%3c%23]][[%3e%23]] +!! result +<p>[[%3c%23]][[%3e%23]] +</p> +!! end + +!! test +Link containing double-single-quotes '' (bug 4598) +!! input +[[Lista d''e paise d''o munno]] +!! result +<p><a href="/index.php?title=Lista_d%27%27e_paise_d%27%27o_munno&action=edit" class="new" title="Lista d''e paise d''o munno">Lista d''e paise d''o munno</a> +</p> +!! end + +!! test +Link containing double-single-quotes '' in text (bug 4598 sanity check) +!! input +Some [[Link|pretty ''italics'' and stuff]]! +!! result +<p>Some <a href="/index.php?title=Link&action=edit" class="new" title="Link">pretty <i>italics</i> and stuff</a>! +</p> +!! end + +!! test +Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check) +!! input +''Some [[Link|pretty ''italics'' and stuff]]! +!! result +<p><i>Some </i><a href="/index.php?title=Link&action=edit" class="new" title="Link"><i>pretty </i>italics<i> and stuff</i></a><i>!</i> +</p> +!! end + +!! test +Plain link to URL +!! input +[[http://www.example.org]] +!! result +<p>[<a href="http://www.example.org" class="external autonumber" title="http://www.example.org" rel="nofollow">[1]</a>] +</p> +!! end + +# I'm fairly sure the expected result here is wrong. +# We want these to be URL links, not pseudo-pages with URLs for titles.... +# However the current output is also pretty screwy. +# +# ---- +# I'm changing it to match the current output--it arguably makes more +# sense in the light of the test above. Old expected result was: +#<p>Piped link to URL: <a href="/index.php?title=Http://www.example.org&action=edit" class="new" title="Http://www.example.org">an example URL</a> +#</p> +# But I think this test is bordering on "garbage in, garbage out" anyway. +# -- wtm +!! test +Piped link to URL +!! input +Piped link to URL: [[http://www.example.org|an example URL]] +!! result +<p>Piped link to URL: [<a href="http://www.example.org|an" class="external text" title="http://www.example.org|an" rel="nofollow">example URL</a>] +</p> +!! end + +!! test +BUG 2: [[page|http://url/]] should link to page, not http://url/ +!! input +[[Main Page|http://url/]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">http://url/</a> +</p> +!! end + +!! test +BUG 337: Escaped self-links should be bold +!! options +title=[[Bug462]] +!! input +[[Bug462]] [[Bug462]] +!! result +<p><strong class="selflink">Bug462</strong> <strong class="selflink">Bug462</strong> +</p> +!! end + +!! test +Self-link to section should not be bold +!! options +title=[[Main Page]] +!! input +[[Main Page#section]] +!! result +<p><a href="/wiki/Main_Page#section" title="Main Page">Main Page#section</a> +</p> +!! end + +!! test +<nowiki> inside a link +!! input +[[Main<nowiki> Page</nowiki>]] [[Main Page|the main page <nowiki>[it's not very good]</nowiki>]] +!! result +<p>[[Main Page]] <a href="/wiki/Main_Page" title="Main Page">the main page [it's not very good]</a> +</p> +!! end + +### +### Interwiki links (see maintenance/interwiki.sql) +### + +!! test +Inline interwiki link +!! input +[[MeatBall:SoftSecurity]] +!! result +<p><a href="http://www.usemod.com/cgi-bin/mb.pl?SoftSecurity" class="extiw" title="meatball:SoftSecurity">MeatBall:SoftSecurity</a> +</p> +!! end + +!! test +Inline interwiki link with empty title (bug 2372) +!! input +[[MeatBall:]] +!! result +<p><a href="http://www.usemod.com/cgi-bin/mb.pl?" class="extiw" title="meatball:">MeatBall:</a> +</p> +!! end + +!! test +Interwiki link encoding conversion (bug 1636) +!! input +*[[Wikipedia:ro:Olteniţa]] +*[[Wikipedia:ro:Olteniţa]] +!! result +<ul><li><a href="http://en.wikipedia.org/wiki/ro:Olteni%C5%A3a" class="extiw" title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a> +</li><li><a href="http://en.wikipedia.org/wiki/ro:Olteni%C5%A3a" class="extiw" title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a> +</li></ul> + +!! end + +!! test +Interwiki link with fragment (bug 2130) +!! input +[[MeatBall:SoftSecurity#foo]] +!! result +<p><a href="http://www.usemod.com/cgi-bin/mb.pl?SoftSecurity#foo" class="extiw" title="meatball:SoftSecurity">MeatBall:SoftSecurity#foo</a> +</p> +!! end + +## +## XHTML tidiness +### + +!! test +<br> to <br /> +!! input +1<br>2<br />3 +!! result +<p>1<br />2<br />3 +</p> +!! end + +!! test +Incorrecly removing closing slashes from correctly formed XHTML +!! input +<br style="clear:both;" /> +!! result +<p><br style="clear:both;" /> +</p> +!! end + +!! test +Failing to transform badly formed HTML into correct XHTML +!! input +<br clear=left> +<br clear=right> +<br clear=all> +!! result +<p><br clear="left" /> +<br clear="right" /> +<br clear="all" /> +</p> +!!end + +!! test +Horizontal ruler (should it add that extra space?) +!! input +<hr> +<hr > +foo <hr +> bar +!! result +<hr /> +<hr /> +foo <hr /> bar + +!! end + +### +### Block-level elements +### +!! test +Common list +!! input +*Common list +* item 2 +*item 3 +!! result +<ul><li>Common list +</li><li> item 2 +</li><li>item 3 +</li></ul> + +!! end + +!! test +Numbered list +!! input +#Numbered list +#item 2 +# item 3 +!! result +<ol><li>Numbered list +</li><li>item 2 +</li><li> item 3 +</li></ol> + +!! end + +!! test +Mixed list +!! input +*Mixed list +*# with numbers +** and bullets +*# and numbers +*bullets again +**bullet level 2 +***bullet level 3 +***#Number on level 4 +**bullet level 2 +**#Number on level 3 +**#Number on level 3 +*#number level 2 +*Level 1 +!! result +<ul><li>Mixed list +<ol><li> with numbers +</li></ol> +<ul><li> and bullets +</li></ul> +<ol><li> and numbers +</li></ol> +</li><li>bullets again +<ul><li>bullet level 2 +<ul><li>bullet level 3 +<ol><li>Number on level 4 +</li></ol> +</li></ul> +</li><li>bullet level 2 +<ol><li>Number on level 3 +</li><li>Number on level 3 +</li></ol> +</li></ul> +<ol><li>number level 2 +</li></ol> +</li><li>Level 1 +</li></ul> + +!! end + +!! test +List items are not parsed correctly following a <pre> block (bug 785) +!! input +* <pre>foo</pre> +* <pre>bar</pre> +* zar +!! result +<ul><li> <pre>foo</pre> +</li><li> <pre>bar</pre> +</li><li> zar +</li></ul> + +!! end + +### +### Magic Words +### + +!! test +Magic Word: {{CURRENTDAY}} +!! input +{{CURRENTDAY}} +!! result +<p>1 +</p> +!! end + +!! test +Magic Word: {{CURRENTDAY2}} +!! input +{{CURRENTDAY2}} +!! result +<p>01 +</p> +!! end + +!! test +Magic Word: {{CURRENTDAYNAME}} +!! input +{{CURRENTDAYNAME}} +!! result +<p>Thursday +</p> +!! end + +!! test +Magic Word: {{CURRENTDOW}} +!! input +{{CURRENTDOW}} +!! result +<p>4 +</p> +!! end + +!! test +Magic Word: {{CURRENTMONTH}} +!! input +{{CURRENTMONTH}} +!! result +<p>01 +</p> +!! end + +!! test +Magic Word: {{CURRENTMONTHABBREV}} +!! input +{{CURRENTMONTHABBREV}} +!! result +<p>Jan +</p> +!! end + +!! test +Magic Word: {{CURRENTMONTHNAME}} +!! input +{{CURRENTMONTHNAME}} +!! result +<p>January +</p> +!! end + +!! test +Magic Word: {{CURRENTMONTHNAMEGEN}} +!! input +{{CURRENTMONTHNAMEGEN}} +!! result +<p>January +</p> +!! end + +!! test +Magic Word: {{CURRENTTIME}} +!! input +{{CURRENTTIME}} +!! result +<p>00:02 +</p> +!! end + +!! test +Magic Word: {{CURRENTWEEK}} (@bug 4594) +!! input +{{CURRENTWEEK}} +!! result +<p>1 +</p> +!! end + +!! test +Magic Word: {{CURRENTYEAR}} +!! input +{{CURRENTYEAR}} +!! result +<p>1970 +</p> +!! end + +!! test +Magic Word: {{FULLPAGENAME}} +!! options +title=[[User:Ævar Arnfjörð Bjarmason]] +!! input +{{FULLPAGENAME}} +!! result +<p>User:Ævar Arnfjörð Bjarmason +</p> +!! end + +!! test +Magic Word: {{FULLPAGENAMEE}} +!! options +title=[[User:Ævar Arnfjörð Bjarmason]] +!! input +{{FULLPAGENAMEE}} +!! result +<p>User:%C3%86var_Arnfj%C3%B6r%C3%B0_Bjarmason +</p> +!! end + +!! test +Magic Word: {{NAMESPACE}} +!! options +title=[[User:Ævar Arnfjörð Bjarmason]] +disabled # FIXME +!! input +{{NAMESPACE}} +!! result +<p>User +</p> +!! end + +!! test +Magic Word: {{NAMESPACEE}} +!! options +title=[[User:Ævar Arnfjörð Bjarmason]] +disabled # FIXME +!! input +{{NAMESPACEE}} +!! result +<p>User +</p> +!! end + +!! test +Magic Word: {{NUMBEROFARTICLES}} +!! input +{{NUMBEROFARTICLES}} +!! result +<p>1 +</p> +!! end + +!! test +Magic Word: {{NUMBEROFFILES}} +!! input +{{NUMBEROFFILES}} +!! result +<p>1 +</p> +!! end + +!! test +Magic Word: {{PAGENAME}} +!! options +title=[[User:Ævar Arnfjörð Bjarmason]] +disabled # FIXME +!! input +{{PAGENAME}} +!! result +<p>Ævar Arnfjörð Bjarmason +</p> +!! end + +!! test +Magic Word: {{PAGENAMEE}} +!! options +title=[[User:Ævar Arnfjörð Bjarmason]] +!! input +{{PAGENAMEE}} +!! result +<p>User:%C3%86var_Arnfj%C3%B6r%C3%B0_Bjarmason +</p> +!! end + +!! test +Magic Word: {{REVISIONID}} +!! input +{{REVISIONID}} +!! result +<p>1337 +</p> +!! end + +!! test +Magic Word: {{SCRIPTPATH}} +!! input +{{SCRIPTPATH}} +!! result +<p>/ +</p> +!! end + +!! test +Magic Word: {{SERVER}} +!! input +{{SERVER}} +!! result +<p><a href="http://localhost" class="external free" title="http://localhost" rel="nofollow">http://localhost</a> +</p> +!! end + +!! test +Magic Word: {{SERVERNAME}} +!! input +{{SERVERNAME}} +!! result +<p>Britney Spears +</p> +!! end + +!! test +Magic Word: {{SITENAME}} +!! input +{{SITENAME}} +!! result +<p>MediaWiki +</p> +!! end + +!! test +Namespace 1 {{ns:1}} +!! input +{{ns:1}} +!! result +<p>Talk +</p> +!! end + +!! test +Namespace 1 {{ns:01}} +!! input +{{ns:01}} +!! result +<p>Talk +</p> +!! end + +!! test +Namespace 0 {{ns:0}} (bug 4783) +!! input +{{ns:0}} +!! result + +!! end + +!! test +Namespace 0 {{ns:00}} (bug 4783) +!! input +{{ns:00}} +!! result + +!! end + +!! test +Namespace -1 {{ns:-1}} +!! input +{{ns:-1}} +!! result +<p>Special +</p> +!! end + +!! test +Namespace Project {{ns:User}} +!! input +{{ns:User}} +!! result +<p>User +</p> +!! end + + +### +### Magic links +### +!! test +Magic links: internal link to RFC (bug 479) +!! input +[[RFC 123]] +!! result +<p><a href="/index.php?title=RFC_123&action=edit" class="new" title="RFC 123">RFC 123</a> +</p> +!! end + +!! test +Magic links: RFC (bug 479) +!! input +RFC 822 +!! result +<p><a href="http://www.ietf.org/rfc/rfc822.txt" class="external" title="http://www.ietf.org/rfc/rfc822.txt">RFC 822</a> +</p> +!! end + +!! test +Magic links: ISBN (bug 1937) +!! input +ISBN 0-306-40615-2 +!! result +<p><a href="/index.php?title=Special:Booksources&isbn=0306406152" class="internal">ISBN 0-306-40615-2</a> +</p> +!! end + +!! test +Magic links: PMID incorrectly converts space to underscore +!! input +PMID 1234 +!! result +<p><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=1234" class="external" title="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=1234">PMID 1234</a> +</p> +!! end + +### +### Templates +#### + +!! test +Nonexistant template +!! input +{{thistemplatedoesnotexist}} +!! result +<p><a href="/index.php?title=Template:Thistemplatedoesnotexist&action=edit" class="new" title="Template:Thistemplatedoesnotexist">Template:Thistemplatedoesnotexist</a> +</p> +!! end + +!! article +Template:test +!! text +This is a test template +!! endarticle + +!! test +Simple template +!! input +{{test}} +!! result +<p>This is a test template +</p> +!! end + +!! test +Template with explicit namespace +!! input +{{Template:test}} +!! result +<p>This is a test template +</p> +!! end + + +!! article +Template:paramtest +!! text +This is a test template with parameter {{{param}}} +!! endarticle + +!! test +Template parameter +!! input +{{paramtest|param=foo}} +!! result +<p>This is a test template with parameter foo +</p> +!! end + +!! article +Template:paramtestnum +!! text +[[{{{1}}}|{{{2}}}]] +!! endarticle + +!! test +Template unnamed parameter +!! input +{{paramtestnum|Main Page|the main page}} +!! result +<p><a href="/wiki/Main_Page" title="Main Page">the main page</a> +</p> +!! end + +!! article +Template:templatesimple +!! text +(test) +!! endarticle + +!! article +Template:templateredirect +!! text +#redirect [[Template:templatesimple]] +!! endarticle + +!! article +Template:templateasargtestnum +!! text +{{{{{1}}}}} +!! endarticle + +!! article +Template:templateasargtest +!! text +{{template{{{templ}}}}} +!! endarticle + +!! article +Template:templateasargtest2 +!! text +{{{{{templ}}}}} +!! endarticle + +!! test +Template with template name as unnamed argument +!! input +{{templateasargtestnum|templatesimple}} +!! result +<p>(test) +</p> +!! end + +!! test +Template with template name as argument +!! input +{{templateasargtest|templ=simple}} +!! result +<p>(test) +</p> +!! end + +!! test +Template with template name as argument (2) +!! input +{{templateasargtest2|templ=templatesimple}} +!! result +<p>(test) +</p> +!! end + +!! article +Template:templateasargtestdefault +!! text +{{{{{templ|templatesimple}}}}} +!! endarticle + +!! article +Template:templa +!! text +'''templ''' +!! endarticle + +!! test +Template with default value +!! input +{{templateasargtestdefault}} +!! result +<p>(test) +</p> +!! end + +!! test +Template with default value (value set) +!! input +{{templateasargtestdefault|templ=templa}} +!! result +<p><b>templ</b> +</p> +!! end + +!! test +Template redirect +!! input +{{templateredirect}} +!! result +<p>(test) +</p> +!! end + +!! test +Template with argument in separate line +!! input +{{ templateasargtest | + templ = simple }} +!! result +<p>(test) +</p> +!! end + +!! test +Template with complex template as argument +!! input +{{paramtest| + param ={{ templateasargtest | + templ = simple }}}} +!! result +<p>This is a test template with parameter (test) +</p> +!! end + +!! test +Template with thumb image (wiht link in description) +!! input +{{paramtest| + param =[[Image:noimage.png|thumb|[[no link|link]] [[no link|caption]]]]}} +!! result +This is a test template with parameter <div class="thumb tright"><div style="width:182px;"><a href="/index.php?title=Special:Upload&wpDestFile=Noimage.png" class="new" title="Image:Noimage.png">Image:Noimage.png</a> <div class="thumbcaption"><a href="/index.php?title=No_link&action=edit" class="new" title="No link">link</a> <a href="/index.php?title=No_link&action=edit" class="new" title="No link">caption</a></div></div></div> + +!! end + +!! article +Template:complextemplate +!! text +{{{1}}} {{paramtest| + param ={{{param}}}}} +!! endarticle + +!! test +Template with complex arguments +!! input +{{complextemplate| + param ={{ templateasargtest | + templ = simple }}|[[Template:complextemplate|link]]}} +!! result +<p><a href="/wiki/Template:Complextemplate" title="Template:Complextemplate">link</a> This is a test template with parameter (test) +</p> +!! end + +!! test +BUG 553: link with two variables in a piped link +!! input +{| +|[[{{{1}}}|{{{2}}}]] +|} +!! result +<table> +<tr> +<td>[[{{{1}}}|{{{2}}}]] +</td></tr></table> + +!! end + +!! test +Magic variable as template parameter +!! input +{{paramtest|param={{SITENAME}}}} +!! result +<p>This is a test template with parameter MediaWiki +</p> +!! end + +!! article +Template:linktest +!! text +[[{{{param}}}|link]] +!! endarticle + +!! test +Template parameter as link source +!! input +{{linktest|param=Main Page}} +!! result +<p><a href="/wiki/Main_Page" title="Main Page">link</a> +</p> +!! end + + +!!article +Template:paramtest2 +!! text +including another template, {{paramtest|param={{{arg}}}}} +!! endarticle + +!! test +Template passing argument to another template +!! input +{{paramtest2|arg='hmm'}} +!! result +<p>including another template, This is a test template with parameter 'hmm' +</p> +!! end + +!! article +Template:Linktest2 +!! text +Main Page +!! endarticle + +!! test +Template as link source +!! input +[[{{linktest2}}]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">Main Page</a> +</p> +!! end + + +!! article +Template:loop1 +!! text +{{loop2}} +!! endarticle + +!! article +Template:loop2 +!! text +{{loop1}} +!! endarticle + +!! test +Template infinite loop +!! input +{{loop1}} +!! result +<p>{{loop1}}<!-- WARNING: template loop detected --> +</p> +!! end + +!! test +Template from main namespace +!! input +{{:Main Page}} +!! result +<p>blah blah +</p> +!! end + +!! article +Template:table +!! text +{| +| 1 || 2 +|- +| 3 || 4 +|} +!! endarticle + +!! test +BUG 529: Template with table, not included at beginning of line +!! input +foo {{table}} +!! result +<p>foo +</p> +<table> +<tr> +<td> 1 </td><td> 2 +</td></tr> +<tr> +<td> 3 </td><td> 4 +</td></tr></table> + +!! end + +!! test +BUG 523: Template shouldn't eat newline (or add an extra one before table) +!! input +foo +{{table}} +!! result +<p>foo +</p> +<table> +<tr> +<td> 1 </td><td> 2 +</td></tr> +<tr> +<td> 3 </td><td> 4 +</td></tr></table> + +!! end + +!! test +BUG 41: Template parameters shown as broken links +!! input +{{{parameter}}} +!! result +<p>{{{parameter}}} +</p> +!! end + + +!! article +Template:MSGNW test +!! text +''None'' of '''this''' should be +* interepreted + but rather passed unmodified +{{test}} +!! endarticle + +# hmm, fix this or just deprecate msgnw and document its behavior? +!! test +msgnw keyword +!! options +disabled +!! input +{{msgnw:MSGNW test}} +!! result +<p>''None'' of '''this''' should be +* interepreted + but rather passed unmodified +{{test}} +</p> +!! end + +!! test +int keyword +!! input +{{int:youhavenewmessages|lots of money|not!}} +!! result +<p>You have lots of money (not!). +</p> +!! end + +!! article +Template:Includes +!! text +Foo<noinclude>zar</noinclude><includeonly>bar</includeonly> +!! endarticle + +!! test +<includeonly> and <noinclude> being included +!! input +{{Includes}} +!! result +<p>Foobar +</p> +!! end + +!! article +Template:Includes2 +!! text +<onlyinclude>Foo</onlyinclude>bar +!! endarticle + +!! test +<onlyinclude> being included +!! input +{{Includes2}} +!! result +<p>Foo +</p> +!! end + + +!! article +Template:Includes3 +!! text +<onlyinclude>Foo</onlyinclude>bar<includeonly>zar</includeonly> +!! endarticle + +!! test +<onlyinclude> and <includeonly> being included +!! input +{{Includes3}} +!! result +<p>Foo +</p> +!! end + +!! test +<includeonly> and <noinclude> on a page +!! input +Foo<noinclude>zar</noinclude><includeonly>bar</includeonly> +!! result +<p>Foozar +</p> +!! end + +!! test +<onlyinclude> on a page +!! input +<onlyinclude>Foo</onlyinclude>bar +!! result +<p>Foobar +</p> +!! end + +### +### Pre-save transform tests +### +!! test +pre-save transform: subst: +!! options +PST +!! input +{{subst:test}} +!! result +This is a test template +!! end + +!! test +pre-save transform: normal template +!! options +PST +!! input +{{test}} +!! result +{{test}} +!! end + +!! test +pre-save transform: nonexistant template +!! options +PST +!! input +{{thistemplatedoesnotexist}} +!! result +{{thistemplatedoesnotexist}} +!! end + + +!! test +pre-save transform: subst magic variables +!! options +PST +!! input +{{subst:SITENAME}} +!! result +MediaWiki +!! end + +# This is bug 89, which I fixed. -- wtm +!! test +pre-save transform: subst: templates with parameters +!! options +pst +!! input +{{subst:paramtest|param="something else"}} +!! result +This is a test template with parameter "something else" +!! end + +!! article +Template:nowikitest +!! text +<nowiki>'''not wiki'''</nowiki> +!! endarticle + +!! test +pre-save transform: nowiki in subst (bug 1188) +!! options +pst +!! input +{{subst:nowikitest}} +!! result +<nowiki>'''not wiki'''</nowiki> +!! end + + +!! article +Template:commenttest +!! text +This template has <!-- a comment --> in it. +!! endarticle + +!! test +pre-save transform: comment in subst (bug 1936) +!! options +pst +!! input +{{subst:commenttest}} +!! result +This template has <!-- a comment --> in it. +!! end + +!! test +pre-save transform: unclosed tag +!! options +pst noxml +!! input +<nowiki>'''not wiki''' +!! result +<nowiki>'''not wiki''' +!! end + +!! test +pre-save transform: mixed tag case +!! options +pst noxml +!! input +<NOwiki>'''not wiki'''</noWIKI> +!! result +<NOwiki>'''not wiki'''</noWIKI> +!! end + +!! test +pre-save transform: unclosed comment in <nowiki> +!! options +pst noxml +!! input +wiki<nowiki>nowiki<!--nowiki</nowiki>wiki +!! result +wiki<nowiki>nowiki<!--nowiki</nowiki>wiki +!!end + +!! article +Template:dangerous +!!text +<span onmouseover="alert('crap')">Oh no</span> +!!endarticle + +!!test +(confirming safety of fix for subst bug 1936) +!! input +{{Template:dangerous}} +!! result +<p><span>Oh no</span> +</p> +!! end + +!! test +pre-save transform: comment containing gallery (bug 5024) +!! options +pst +!! input +<!-- <gallery>data</gallery> --> +!!result +<!-- <gallery>data</gallery> --> +!!end + +!! test +pre-save transform: comment containing extension +!! options +pst +!! input +<!-- <tag>data</tag> --> +!!result +<!-- <tag>data</tag> --> +!!end + +!! test +pre-save transform: comment containing nowiki +!! options +pst +!! input +<!-- <nowiki>data</nowiki> --> +!!result +<!-- <nowiki>data</nowiki> --> +!!end + +!! test +pre-save transform: comment containing math +!! options +pst +!! input +<!-- <math>data</math> --> +!!result +<!-- <math>data</math> --> +!!end + +!! test +pre-save transform: <noinclude> in subst (bug 3298) +!! options +pst +!! input +{{subst:Includes}} +!! result +Foobar +!! end + +!! test +pre-save transform: <onlyinclude> in subst (bug 3298) +!! options +pst +!! input +{{subst:Includes2}} +!! result +Foo +!! end + + +### +### Message transform tests +### +!! test +message transform: magic variables +!! options +msg +!! input +{{SITENAME}} +!! result +MediaWiki +!! end + +!! test +message transform: should not transform wiki markup +!! options +msg +!! input +''test'' +!! result +''test'' +!! end + +!! test +message transform: <noinclude> in transcluded template (bug 4926) +!! options +msg +!! input +{{Includes}} +!! result +Foobar +!! end + +!! test +message transform: <onlyinclude> in transcluded template (bug 4926) +!! options +msg +!! input +{{Includes2}} +!! result +Foo +!! end + + +### +### Images +### +!! test +Simple image +!! input +[[Image:foobar.jpg]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="Image:foobar.jpg"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="Image:foobar.jpg" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +Right-aligned image +!! input +[[Image:foobar.jpg|right]] +!! result +<div class="floatright"><span><a href="/wiki/Image:Foobar.jpg" class="image" title=""><img src="http://example.com/images/3/3a/Foobar.jpg" alt="" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a></span></div> + +!! end + +!! test +Image with caption +!! input +[[Image:foobar.jpg|right|Caption text]] +!! result +<div class="floatright"><span><a href="/wiki/Image:Foobar.jpg" class="image" title="Caption text"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="Caption text" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a></span></div> + +!! end + +!! test +Image with frame and link +!! input +[[Image:Foobar.jpg|frame|left|This is a test image [[Main Page]]]] +!! result +<div class="thumb tleft"><div style="width:1943px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="This is a test image Main Page"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="This is a test image Main Page" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption">This is a test image <a href="/wiki/Main_Page" title="Main Page">Main Page</a></div></div></div> + +!! end + +!! test +Link to image page- image page normally doesn't exists, hence edit link +TODO: Add test with existing image page +#<p><a href="/wiki/Image:Test" title="Image:Test">Image:test</a> +!! input +[[:Image:test]] +!! result +<p><a href="/index.php?title=Image:Test&action=edit" class="new" title="Image:Test">Image:test</a> +</p> +!! end + +!! test +Frameless image caption with a free URL +!! input +[[Image:foobar.jpg|http://example.com]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="http://example.com"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="http://example.com" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +Thumbnail image caption with a free URL +!! input +[[Image:foobar.jpg|thumb|http://example.com]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="http://example.com"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="http://example.com" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div><a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a></div></div></div> + +!! end + +!! test +BUG 1887: A ISBN with a thumbnail +!! input +[[Image:foobar.jpg|thumb|ISBN 12354]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="ISBN 12354"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="ISBN 12354" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div><a href="/index.php?title=Special:Booksources&isbn=12354" class="internal">ISBN 12354</a></div></div></div> + +!! end + +!! test +BUG 1887: A RFC with a thumbnail +!! input +[[Image:foobar.jpg|thumb|This is RFC 12354]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="This is RFC 12354"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="This is RFC 12354" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div>This is <a href="http://www.ietf.org/rfc/rfc12354.txt" class="external" title="http://www.ietf.org/rfc/rfc12354.txt">RFC 12354</a></div></div></div> + +!! end + +!! test +BUG 1887: A mailto link with a thumbnail +!! input +[[Image:foobar.jpg|thumb|Please mailto:nobody@example.com]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Please mailto:nobody@example.com"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="Please mailto:nobody@example.com" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div>Please <a href="mailto:nobody@example.com" class="external free" title="mailto:nobody@example.com" rel="nofollow">mailto:nobody@example.com</a></div></div></div> + +!! end + +!! test +BUG 1887: A <math> with a thumbnail- we don't render math in the parsertests by default, +so math is not stripped and turns up as escaped <math> tags. +!! input +[[Image:foobar.jpg|thumb|<math>2+2</math>]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="<math>2+2</math>"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="<math>2+2</math>" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div><math>2+2</math></div></div></div> + +!! end + +!! test +BUG 1887, part 2: A <math> with a thumbnail- math enabled +!! options +math +!! input +[[Image:foobar.jpg|thumb|<math>2+2</math>]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="2 + 2"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="2 + 2" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div><span class="texhtml">2 + 2</span></div></div></div> + +!! end + +# Pending resolution to bug 368 +!! test +BUG 648: Frameless image caption with a link +!! input +[[Image:foobar.jpg|text with a [[link]] in it]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="text with a link in it"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="text with a link in it" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +BUG 648: Frameless image caption with a link (suffix) +!! input +[[Image:foobar.jpg|text with a [[link]]foo in it]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="text with a linkfoo in it"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="text with a linkfoo in it" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +BUG 648: Frameless image caption with an interwiki link +!! input +[[Image:foobar.jpg|text with a [[MeatBall:Link]] in it]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="text with a MeatBall:Link in it"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="text with a MeatBall:Link in it" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +BUG 648: Frameless image caption with a piped interwiki link +!! input +[[Image:foobar.jpg|text with a [[MeatBall:Link|link]] in it]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="text with a link in it"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="text with a link in it" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +Escape HTML special chars in image alt text +!! input +[[Image:foobar.jpg|& < > "]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="& < > ""><img src="http://example.com/images/3/3a/Foobar.jpg" alt="& < > "" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +BUG 499: Alt text should have Ӓ, not &1234; +!! input +[[Image:foobar.jpg|♀]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="♀"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="♀" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!! end + +!! test +Broken image caption with link +!! input +[[Image:Foobar.jpg|thumb|This is a broken caption. But [[Main Page|this]] is just an ordinary link. +!! result +<p>[[Image:Foobar.jpg|thumb|This is a broken caption. But <a href="/wiki/Main_Page" title="Main Page">this</a> is just an ordinary link. +</p> +!! end + +!! test +Image caption containing another image +!! input +[[Image:Foobar.jpg|thumb|This is a caption with another [[Image:icon.png|image]] inside it!]] +!! result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="This is a caption with another Image:Icon.png inside it!"><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/180px-Foobar.jpg" alt="This is a caption with another Image:Icon.png inside it!" width="180" height="20" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div>This is a caption with another <a href="/index.php?title=Special:Upload&wpDestFile=Icon.png" class="new" title="Image:Icon.png">Image:Icon.png</a> inside it!</div></div></div> + +!! end + +!! test +Image caption containing a newline +!! input +[[Image:Foobar.jpg|This +*is some text]] +!! result +<p><a href="/wiki/Image:Foobar.jpg" class="image" title="This *is some text"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="This *is some text" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!!end + + +!! test +Bug 3090: External links other than http: in image captions +!! input +[[Image:Foobar.jpg|thumb|200px|This caption has [irc://example.net irc] and [https://example.com Secure] ext links in it.]] +!! result +<div class="thumb tright"><div style="width:202px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title="This caption has irc and Secure ext links in it."><img src="http://example.com/images/thumb/3/3a/Foobar.jpg/200px-Foobar.jpg" alt="This caption has irc and Secure ext links in it." width="200" height="23" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div>This caption has <a href="irc://example.net" class="external text" title="irc://example.net" rel="nofollow">irc</a> and <a href="https://example.com" class="external text" title="https://example.com" rel="nofollow">Secure</a> ext links in it.</div></div></div> + +!! end + + +### +### Subpages +### +!! article +Subpage test/subpage +!! text +foo +!! endarticle + +!! test +Subpage link +!! options +subpage title=[[Subpage test]] +!! input +[[/subpage]] +!! result +<p><a href="/wiki/Subpage_test/subpage" title="Subpage test/subpage">/subpage</a> +</p> +!! end + +!! test +Subpage noslash link +!! options +subpage title=[[Subpage test]] +!!input +[[/subpage/]] +!! result +<p><a href="/wiki/Subpage_test/subpage" title="Subpage test/subpage">subpage</a> +</p> +!! end + +!! test +Disabled subpages +!! input +[[/subpage]] +!! result +<p><a href="/index.php?title=/subpage&action=edit" class="new" title="/subpage">/subpage</a> +</p> +!! end + +!! test +BUG 561: {{/Subpage}} +!! options +subpage title=[[Page]] +!! input +{{/Subpage}} +!! result +<p><a href="/index.php?title=Page/Subpage&action=edit" class="new" title="Page/Subpage">Page/Subpage</a> +</p> +!! end + +### +### Categories +### +!! article +Category:MediaWiki User's Guide +!! text +blah +!! endarticle + +!! test +Link to category +!! input +[[:Category:MediaWiki User's Guide]] +!! result +<p><a href="/wiki/Category:MediaWiki_User%27s_Guide" title="Category:MediaWiki User's Guide">Category:MediaWiki User's Guide</a> +</p> +!! end + +!! test +Simple category +!! options +cat +!! input +[[Category:MediaWiki User's Guide]] +!! result +<a href="/wiki/Category:MediaWiki_User%27s_Guide" title="Category:MediaWiki User's Guide">MediaWiki User's Guide</a> +!! end + +### +### Inter-language links +### +!! test +Inter-language links +!! options +ill +!! input +[[es:Alimento]] +[[fr:Nourriture]] +[[zh:食品]] +!! result +es:Alimento fr:Nourriture zh:食品 +!! end + +### +### Sections +### +!! test +Basic section headings +!! options +title=[[Parser test script]] +!! input +== Headline 1 == +Some text + +==Headline 2== +More +===Smaller headline=== +Blah blah +!! result +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=1" title="Edit section: Headline 1">edit</a>]</div><a name="Headline_1"></a><h2> Headline 1 </h2> +<p>Some text +</p> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=2" title="Edit section: Headline 2">edit</a>]</div><a name="Headline_2"></a><h2>Headline 2</h2> +<p>More +</p> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=3" title="Edit section: Smaller headline">edit</a>]</div><a name="Smaller_headline"></a><h3>Smaller headline</h3> +<p>Blah blah +</p> +!! end + +!! test +Section headings with TOC +!! options +title=[[Parser test script]] +!! input +== Headline 1 == +=== Subheadline 1 === +===== Skipping a level ===== +====== Skipping a level ====== + +== Headline 2 == +Some text +===Another headline=== +!! result +<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div> +<ul> +<li class="toclevel-1"><a href="#Headline_1"><span class="tocnumber">1</span> <span class="toctext">Headline 1</span></a> +<ul> +<li class="toclevel-2"><a href="#Subheadline_1"><span class="tocnumber">1.1</span> <span class="toctext">Subheadline 1</span></a> +<ul> +<li class="toclevel-3"><a href="#Skipping_a_level"><span class="tocnumber">1.1.1</span> <span class="toctext">Skipping a level</span></a> +<ul> +<li class="toclevel-4"><a href="#Skipping_a_level_2"><span class="tocnumber">1.1.1.1</span> <span class="toctext">Skipping a level</span></a></li> +</ul> +</li> +</ul> +</li> +</ul> +</li> +<li class="toclevel-1"><a href="#Headline_2"><span class="tocnumber">2</span> <span class="toctext">Headline 2</span></a> +<ul> +<li class="toclevel-2"><a href="#Another_headline"><span class="tocnumber">2.1</span> <span class="toctext">Another headline</span></a></li> +</ul> +</li> +</ul> +</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=1" title="Edit section: Headline 1">edit</a>]</div><a name="Headline_1"></a><h2> Headline 1 </h2> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=2" title="Edit section: Subheadline 1">edit</a>]</div><a name="Subheadline_1"></a><h3> Subheadline 1 </h3> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=3" title="Edit section: Skipping a level">edit</a>]</div><a name="Skipping_a_level"></a><h5> Skipping a level </h5> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=4" title="Edit section: Skipping a level">edit</a>]</div><a name="Skipping_a_level_2"></a><h6> Skipping a level </h6> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=5" title="Edit section: Headline 2">edit</a>]</div><a name="Headline_2"></a><h2> Headline 2 </h2> +<p>Some text +</p> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=6" title="Edit section: Another headline">edit</a>]</div><a name="Another_headline"></a><h3>Another headline</h3> + +!! end + +# perl -e 'print "="x$_," Level $_ heading","="x$_,"\n" for 1..10' +!! test +Handling of sections up to level 6 and beyond +!! input += Level 1 Heading= +== Level 2 Heading== +=== Level 3 Heading=== +==== Level 4 Heading==== +===== Level 5 Heading===== +====== Level 6 Heading====== +======= Level 7 Heading======= +======== Level 8 Heading======== +========= Level 9 Heading========= +========== Level 10 Heading========== +!! result +<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div> +<ul> +<li class="toclevel-1"><a href="#Level_1_Heading"><span class="tocnumber">1</span> <span class="toctext">Level 1 Heading</span></a> +<ul> +<li class="toclevel-2"><a href="#Level_2_Heading"><span class="tocnumber">1.1</span> <span class="toctext">Level 2 Heading</span></a> +<ul> +<li class="toclevel-3"><a href="#Level_3_Heading"><span class="tocnumber">1.1.1</span> <span class="toctext">Level 3 Heading</span></a> +<ul> +<li class="toclevel-4"><a href="#Level_4_Heading"><span class="tocnumber">1.1.1.1</span> <span class="toctext">Level 4 Heading</span></a> +<ul> +<li class="toclevel-5"><a href="#Level_5_Heading"><span class="tocnumber">1.1.1.1.1</span> <span class="toctext">Level 5 Heading</span></a> +<ul> +<li class="toclevel-6"><a href="#Level_6_Heading"><span class="tocnumber">1.1.1.1.1.1</span> <span class="toctext">Level 6 Heading</span></a></li> +<li class="toclevel-6"><a href="#.3D_Level_7_Heading.3D"><span class="tocnumber">1.1.1.1.1.2</span> <span class="toctext">= Level 7 Heading=</span></a></li> +<li class="toclevel-6"><a href="#.3D.3D_Level_8_Heading.3D.3D"><span class="tocnumber">1.1.1.1.1.3</span> <span class="toctext">== Level 8 Heading==</span></a></li> +<li class="toclevel-6"><a href="#.3D.3D.3D_Level_9_Heading.3D.3D.3D"><span class="tocnumber">1.1.1.1.1.4</span> <span class="toctext">=== Level 9 Heading===</span></a></li> +<li class="toclevel-6"><a href="#.3D.3D.3D.3D_Level_10_Heading.3D.3D.3D.3D"><span class="tocnumber">1.1.1.1.1.5</span> <span class="toctext">==== Level 10 Heading====</span></a></li> +</ul> +</li> +</ul> +</li> +</ul> +</li> +</ul> +</li> +</ul> +</li> +</ul> +</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: Level 1 Heading">edit</a>]</div><a name="Level_1_Heading"></a><h1> Level 1 Heading</h1> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=2" title="Edit section: Level 2 Heading">edit</a>]</div><a name="Level_2_Heading"></a><h2> Level 2 Heading</h2> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=3" title="Edit section: Level 3 Heading">edit</a>]</div><a name="Level_3_Heading"></a><h3> Level 3 Heading</h3> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=4" title="Edit section: Level 4 Heading">edit</a>]</div><a name="Level_4_Heading"></a><h4> Level 4 Heading</h4> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=5" title="Edit section: Level 5 Heading">edit</a>]</div><a name="Level_5_Heading"></a><h5> Level 5 Heading</h5> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=6" title="Edit section: Level 6 Heading">edit</a>]</div><a name="Level_6_Heading"></a><h6> Level 6 Heading</h6> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=7" title="Edit section: = Level 7 Heading=">edit</a>]</div><a name=".3D_Level_7_Heading.3D"></a><h6>= Level 7 Heading=</h6> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=8" title="Edit section: == Level 8 Heading==">edit</a>]</div><a name=".3D.3D_Level_8_Heading.3D.3D"></a><h6>== Level 8 Heading==</h6> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=9" title="Edit section: === Level 9 Heading===">edit</a>]</div><a name=".3D.3D.3D_Level_9_Heading.3D.3D.3D"></a><h6>=== Level 9 Heading===</h6> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=10" title="Edit section: ==== Level 10 Heading====">edit</a>]</div><a name=".3D.3D.3D.3D_Level_10_Heading.3D.3D.3D.3D"></a><h6>==== Level 10 Heading====</h6> + +!! end + +!! test +Resolving duplicate section names +!! options +title=[[Parser test script]] +!! input +== Foo bar == +== Foo bar == +!! result +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=1" title="Edit section: Foo bar">edit</a>]</div><a name="Foo_bar"></a><h2> Foo bar </h2> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=2" title="Edit section: Foo bar">edit</a>]</div><a name="Foo_bar_2"></a><h2> Foo bar </h2> + +!! end + +!! article +Template:sections +!! text +===Section 1=== +==Section 2== +!! endarticle + +!! test +Template with sections, __NOTOC__ +!! options +title=[[Parser test script]] +!! input +__NOTOC__ +==Section 0== +{{sections}} +==Section 4== +!! result +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=1" title="Edit section: Section 0">edit</a>]</div><a name="Section_0"></a><h2>Section 0</h2> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Template:Sections&action=edit&section=1" title="Template:Sections">edit</a>]</div><a name="Section_1"></a><h3>Section 1</h3> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Template:Sections&action=edit&section=2" title="Template:Sections">edit</a>]</div><a name="Section_2"></a><h2>Section 2</h2> +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=2" title="Edit section: Section 4">edit</a>]</div><a name="Section_4"></a><h2>Section 4</h2> + +!! end + +!! test +__NOEDITSECTION__ keyword +!! input +__NOEDITSECTION__ +==Section 1== +==Section 2== +!! result +<a name="Section_1"></a><h2>Section 1</h2> +<a name="Section_2"></a><h2>Section 2</h2> + +!! end + +!! test +Link inside a section heading +!! options +title=[[Parser test script]] +!! input +==Section with a [[Main Page|link]] in it== +!! result +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test_script&action=edit&section=1" title="Edit section: Section with a link in it">edit</a>]</div><a name="Section_with_a_link_in_it"></a><h2>Section with a <a href="/wiki/Main_Page" title="Main Page">link</a> in it</h2> + +!! end + + +!! test +BUG 1219 URL next to image (good) +!! input +http://example.com [[Image:foobar.jpg]] +!! result +<p><a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a> <a href="/wiki/Image:Foobar.jpg" class="image" title="Image:foobar.jpg"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="Image:foobar.jpg" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!!end + +!! test +BUG 1219 URL next to image (broken) +!! input +http://example.com[[Image:foobar.jpg]] +!! result +<p><a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a><a href="/wiki/Image:Foobar.jpg" class="image" title="Image:foobar.jpg"><img src="http://example.com/images/3/3a/Foobar.jpg" alt="Image:foobar.jpg" width="1941" height="220" longdesc="/wiki/Image:Foobar.jpg" /></a> +</p> +!!end + +!! test +Bug 1186 news: in the middle of text +!! input +http://en.wikinews.org/wiki/Wikinews:Workplace +!! result +<p><a href="http://en.wikinews.org/wiki/Wikinews:Workplace" class="external free" title="http://en.wikinews.org/wiki/Wikinews:Workplace" rel="nofollow">http://en.wikinews.org/wiki/Wikinews:Workplace</a> +</p> +!!end + + +!! test +Namespaced link must have a title +!! input +[[Project:]] +!! result +<p>[[Project:]] +</p> +!!end + +!! test +Namespaced link must have a title (bad fragment version) +!! input +[[Project:#fragment]] +!! result +<p>[[Project:#fragment]] +</p> +!!end + + +!! test +div with no attributes +!! input +<div>HTML rocks</div> +!! result +<div>HTML rocks</div> + +!! end + +!! test +div with double-quoted attribute +!! input +<div id="rock">HTML rocks</div> +!! result +<div id="rock">HTML rocks</div> + +!! end + +!! test +div with single-quoted attribute +!! input +<div id='rock'>HTML rocks</div> +!! result +<div id="rock">HTML rocks</div> + +!! end + +!! test +div with unquoted attribute +!! input +<div id=rock>HTML rocks</div> +!! result +<div id="rock">HTML rocks</div> + +!! end + +!! test +div with illegal double attributes +!! input +<div align="center" align="right">HTML rocks</div> +!! result +<div align="right">HTML rocks</div> + +!!end + +!! test +HTML multiple attributes correction +!! input +<p class="error" class="awesome">Awesome!</p> +!! result +<p class="awesome">Awesome!</p> + +!!end + +!! test +Table multiple attributes correction +!! input +{| +!+ class="error" class="awesome"| status +|} +!! result +<table> +<tr> +<th class="awesome"> status +</th></tr></table> + +!!end + +!! test +DIV IN UPPERCASE +!! input +<DIV ALIGN="center">HTML ROCKS</DIV> +!! result +<div align="center">HTML ROCKS</div> + +!!end + + +!! test +text with amp in the middle of nowhere +!! input +Remember AT&T? +!!result +<p>Remember AT&T? +</p> +!! end + +!! test +text with character entity: eacute +!! input +I always thought é was a cute letter. +!! result +<p>I always thought é was a cute letter. +</p> +!! end + +!! test +text with undefined character entity: xacute +!! input +I always thought &xacute; was a cute letter. +!! result +<p>I always thought &xacute; was a cute letter. +</p> +!! end + + +### +### Media links +### + +!! test +Media link +!! input +[[Media:Foobar.jpg]] +!! result +<p><a href="http://example.com/images/3/3a/Foobar.jpg" class="internal" title="Foobar.jpg">Media:Foobar.jpg</a> +</p> +!! end + +!! test +Media link with text +!! input +[[Media:Foobar.jpg|A neat file to look at]] +!! result +<p><a href="http://example.com/images/3/3a/Foobar.jpg" class="internal" title="Foobar.jpg">A neat file to look at</a> +</p> +!! end + +# FIXME: this is still bad HTML tag nesting +!! test +Media link with nasty text +fixme: doBlockLevels won't wrap this in a paragraph because it contains a div +!! input +[[Media:Foobar.jpg|Safe Link<div style=display:none>" onmouseover="alert(document.cookie)" onfoo="</div>]] +!! result +<a href="http://example.com/images/3/3a/Foobar.jpg" class="internal" title="Foobar.jpg">Safe Link<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div></a> + +!! end + +!! test +Media link to nonexistent file (bug 1702) +!! input +[[Media:No such.jpg]] +!! result +<p><a href="/index.php?title=Special:Upload&wpDestFile=No_such.jpg" class="new" title="No such.jpg">Media:No such.jpg</a> +</p> +!! end + +!! test +Image link to nonexistent file (bug 1850 - good) +!! input +[[Image:No such.jpg]] +!! result +<p><a href="/index.php?title=Special:Upload&wpDestFile=No_such.jpg" class="new" title="Image:No such.jpg">Image:No such.jpg</a> +</p> +!! end + +!! test +:Image link to nonexistent file (bug 1850 - bad) +!! input +[[:Image:No such.jpg]] +!! result +<p><a href="/index.php?title=Image:No_such.jpg&action=edit" class="new" title="Image:No such.jpg">Image:No such.jpg</a> +</p> +!! end + + + +!! test +Character reference normalization in link text (bug 1938) +!! input +[[Main Page|this&that]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">this&that</a> +</p> +!!end + +!! test +Empty attribute crash test (bug 2067) +!! input +<font color="">foo</font> +!! result +<p><font color="">foo</font> +</p> +!! end + +!! test +Empty attribute crash test single-quotes (bug 2067) +!! input +<font color=''>foo</font> +!! result +<p><font color="">foo</font> +</p> +!! end + +!! test +Attribute test: equals, then nothing +!! input +<font color=>foo</font> +!! result +<p><font>foo</font> +</p> +!! end + +!! test +Attribute test: unquoted value +!! input +<font color=x>foo</font> +!! result +<p><font color="x">foo</font> +</p> +!! end + +!! test +Attribute test: unquoted but illegal value (hash) +!! input +<font color=#x>foo</font> +!! result +<p><font color="#x">foo</font> +</p> +!! end + +!! test +Attribute test: no value +!! input +<font color>foo</font> +!! result +<p><font color="color">foo</font> +</p> +!! end + +!! test +Bug 2095: link with three closing brackets +!! input +[[Main Page]]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">Main Page</a>] +</p> +!! end + +!! test +Bug 2095: link with pipe and three closing brackets +!! input +[[Main Page|link]]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">link</a>] +</p> +!! end + +!! test +Bug 2095: link with pipe and three closing brackets, version 2 +!! input +[[Main Page|[http://example.com/]]] +!! result +<p><a href="/wiki/Main_Page" title="Main Page">[http://example.com/]</a> +</p> +!! end + + +### +### Safety +### + +!! article +Template:Dangerous attribute +!! text +" onmouseover="alert(document.cookie) +!! endarticle + +!! article +Template:Dangerous style attribute +!! text +border-size: expression(alert(document.cookie)) +!! endarticle + +!! article +Template:Div style +!! text +<div style="float: right; {{{1}}}">Magic div</div> +!! endarticle + +!! test +Bug 2304: HTML attribute safety (safe template; regression bug 2309) +!! input +<div title="{{test}}"></div> +!! result +<div title="This is a test template"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (dangerous template; 2309) +!! input +<div title="{{dangerous attribute}}"></div> +!! result +<div title=""></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (dangerous style template; 2309) +!! input +<div style="{{dangerous style attribute}}"></div> +!! result +<div></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (safe parameter; 2309) +!! input +{{div style|width: 200px}} +!! result +<div style="float: right; width: 200px">Magic div</div> + +!! end + +!! test +Bug 2304: HTML attribute safety (unsafe parameter; 2309) +!! input +{{div style|width: expression(alert(document.cookie))}} +!! result +<div>Magic div</div> + +!! end + +!! test +Bug 2304: HTML attribute safety (unsafe breakout parameter; 2309) +!! input +{{div style|"><script>alert(document.cookie)</script>}} +!! result +<div>Magic div</div> + +!! end + +!! test +Bug 2304: HTML attribute safety (unsafe breakout parameter 2; 2309) +!! input +{{div style|" ><script>alert(document.cookie)</script>}} +!! result +<div style="float: right;">Magic div</div> + +!! end + +!! test +Bug 2304: HTML attribute safety (link) +!! input +<div title="[[Main Page]]"></div> +!! result +<div title="[[Main Page]]"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (italics) +!! input +<div title="''foobar''"></div> +!! result +<div title="''foobar''"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (bold) +!! input +<div title="'''foobar'''"></div> +!! result +<div title="'''foobar'''"></div> + +!! end + + +!! test +Bug 2304: HTML attribute safety (ISBN) +!! input +<div title="ISBN 1234567890"></div> +!! result +<div title="ISBN 1234567890"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (RFC) +!! input +<div title="RFC 1234"></div> +!! result +<div title="RFC 1234"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (PMID) +!! input +<div title="PMID 1234567890"></div> +!! result +<div title="PMID 1234567890"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (web link) +!! input +<div title="http://example.com/"></div> +!! result +<div title="http://example.com/"></div> + +!! end + +!! test +Bug 2304: HTML attribute safety (named web link) +!! input +<div title="[http://example.com/ link]"></div> +!! result +<div title="[http://example.com/ link]"></div> + +!! end + +!! test +Bug 3244: HTML attribute safety (extension; safe) +!! input +<div style="<nowiki>background:blue</nowiki>"></div> +!! result +<div style="background:blue"></div> + +!! end + +!! test +Bug 3244: HTML attribute safety (extension; unsafe) +!! input +<div style="<nowiki>border-left:expression(alert(document.cookie))</nowiki>"></div> +!! result +<div></div> + +!! end + +!! test +Math section safety when disabled +!! input +<math><script>alert(document.cookies);</script></math> +!! result +<p><math><script>alert(document.cookies);</script></math> +</p> +!! end + +# More MSIE fun discovered by Tom Gilder + +!! test +MSIE CSS safety test: spurious slash +!! input +<div style="background-image:u\rl(javascript:alert('boo'))">evil</div> +!! result +<div>evil</div> + +!! end + +!! test +MSIE CSS safety test: hex code +!! input +<div style="background-image:u\72l(javascript:alert('boo'))">evil</div> +!! result +<div>evil</div> + +!! end + +!! test +MSIE CSS safety test: comment in url +!! input +<div style="background-image:u/**/rl(javascript:alert('boo'))">evil</div> +!! result +<div style="background-image:u rl(javascript:alert('boo'))">evil</div> + +!! end + +!! test +MSIE CSS safety test: comment in expression +!! input +<div style="background-image:expres/**/sion(alert('boo4'))">evil4</div> +!! result +<div style="background-image:expres sion(alert('boo4'))">evil4</div> + +!! end + + +!! test +Table attribute legitimate extension +!! input +{| +!+ style="<nowiki>color:blue</nowiki>"| status +|} +!! result +<table> +<tr> +<th style="color:blue"> status +</th></tr></table> + +!!end + +!! test +Table attribute safety +!! input +{| +!+ style="<nowiki>border-width:expression(0+alert(document.cookie))</nowiki>"| status +|} +!! result +<table> +<tr> +<th> status +</th></tr></table> + +!! end + + +!! article +Template:Identity +!! text +{{{1}}} +!! endarticle + +!! test +Expansion of multi-line templates in attribute values (bug 6255) +!! input +<div style="background: {{identity|#00FF00}}">-</div> +!! result +<div style="background: #00FF00">-</div> + +!! end + + +!! test +Expansion of multi-line templates in attribute values (bug 6255 sanity check) +!! input +<div style="background: +#00FF00">-</div> +!! result +<div style="background: #00FF00">-</div> + +!! end + +!! test +Expansion of multi-line templates in attribute values (bug 6255 sanity check) +!! input +<div style="background: #00FF00">-</div> +!! result +<div style="background: #00FF00">-</div> + +!! end + +### +### Parser hooks (see maintenance/parserTestsParserHook.php for the <tag> extension) +### +!! test +Parser hook: empty input +!! input +<tag></tag> +!! result +<pre> +string(0) "" +array(0) { +} +</pre> + +!! end + +!! test +Parser hook: empty input using terminated empty elements +!! input +<tag/> +!! result +<pre> +NULL +array(0) { +} +</pre> + +!! end + +!! test +Parser hook: empty input using terminated empty elements (space before) +!! input +<tag /> +!! result +<pre> +NULL +array(0) { +} +</pre> + +!! end + +!! test +Parser hook: basic input +!! input +<tag>input</tag> +!! result +<pre> +string(5) "input" +array(0) { +} +</pre> + +!! end + + +!! test +Parser hook: case insensetive +!! input +<TAG>input</TAG> +!! result +<pre> +string(5) "input" +array(0) { +} +</pre> + +!! end + + +!! test +Parser hook: case insensetive, redux +!! input +<TaG>input</TAg> +!! result +<pre> +string(5) "input" +array(0) { +} +</pre> + +!! end + +!! test +Parser hook: nested tags +!! options +noxml +!! input +<tag><tag></tag></tag> +!! result +<pre> +string(5) "<tag>" +array(0) { +} +</pre></tag> + +!! end + +!! test +Parser hook: basic arguments +!! input +<tag width=200 height = "100" depth = '50' square></tag> +!! result +<pre> +string(0) "" +array(4) { + ["width"]=> + string(3) "200" + ["height"]=> + string(3) "100" + ["depth"]=> + string(2) "50" + ["square"]=> + string(6) "square" +} +</pre> + +!! end + +!! test +Parser hook: argument containing a forward slash (bug 5344) +!! input +<tag filename='/tmp/bla'></tag> +!! result +<pre> +string(0) "" +array(1) { + ["filename"]=> + string(8) "/tmp/bla" +} +</pre> + +!! end + +!! test +Parser hook: empty input using terminated empty elements (bug 2374) +!! input +<tag foo=bar/>text +!! result +<pre> +NULL +array(1) { + ["foo"]=> + string(3) "bar" +} +</pre>text + +!! end + +# </tag> should be output literally since there is no matching tag that begins it +!! test +Parser hook: basic arguments using terminated empty elements (bug 2374) +!! input +<tag width=200 height = "100" depth = '50' square/> +other stuff +</tag> +!! result +<pre> +NULL +array(4) { + ["width"]=> + string(3) "200" + ["height"]=> + string(3) "100" + ["depth"]=> + string(2) "50" + ["square"]=> + string(6) "square" +} +</pre> +<p>other stuff +</tag> +</p> +!! end + +### +### (see maintenance/parserTestsStaticParserHook.php for the <statictag> extension) +### + +!! test +Parser hook: static parser hook not inside a comment +!! input +<statictag>hello, world</statictag> +<statictag action=flush/> +!! result +<p>hello, world +</p> +!! end + + +!! test +Parser hook: static parser hook inside a comment +!! input +<!-- <statictag>hello, world</statictag> --> +<statictag action=flush/> +!! result +<p><br /> +</p> +!! end + +# Nested template calls; this case was broken by Parser.php rev 1.506, +# since reverted. + +!! article +Template:One-parameter +!! text +(My parameter is: {{{1}}}) +!! endarticle + +!! article +Template:Map-one-parameter +!! text +{{{{{1}}}|{{{2}}}}} +!! endarticle + +!! test +Nested template calls +!! input +{{Map-one-parameter|One-parameter|param}} +!! result +<p>(My parameter is: param) +</p> +!! end + + +### +### Sanitizer +### +!! test +Sanitizer: Closing of open tags +!! input +<s></s><table></table> +!! result +<s></s><table></table> + +!! end + +!! test +Sanitizer: Closing of open but not closed tags +!! input +<s>foo +!! result +<p><s>foo</s> +</p> +!! end + +!! test +Sanitizer: Closing of closed but not open tags +!! input +</s> +!! result +<p></s> +</p> +!! end + +!! test +Sanitizer: Closing of closed but not open table tags +!! input +Table not started</td></tr></table> +!! result +<p>Table not started</td></tr></table> +</p> +!! end + +!! test +Sanitizer: Escaping of spaces, multibyte characters, colons & other stuff in id="" +!! input +<span id="æ: v">byte</span>[[#æ: v|backlink]] +!! result +<p><span id=".C3.A6:_v">byte</span><a href="#.C3.A6:_v" title="">backlink</a> +</p> +!! end + +!! test +Sanitizer: Validating the contents of the id attribute (bug 4515) +!! options +disabled +!! input +<br id=9 /> +!! result +Something, but defenetly not <br id="9" />... +!! end + +!! test +Language converter: output gets cut off unexpectedly (bug 5757) +!! options +language=zh +!! input +this bit is safe: }- + +but if we add a conversion instance: -{zh-cn:xxx;zh-tw:yyy}- + +then we get cut off here: }- + +all additional text is vanished +!! result +<p>this bit is safe: }- +</p><p>but if we add a conversion instance: xxx +</p><p>then we get cut off here: }- +</p><p>all additional text is vanished +</p> +!! end + +!! test +Self closed html pairs (bug 5487) +!! options +!! input +<center><font id="bug" />Centered text</center> +<div><font id="bug2" />In div text</div> +!! result +<center><font id="bug" />Centered text</center> +<div><font id="bug2" />In div text</div> + +!! end + +# +# +# + +!! test +HTML bullet list, closed tags (bug 5497) +!! input +<ul> +<li>One</li> +<li>Two</li> +</ul> +!! result +<ul> +<li>One</li> +<li>Two</li> +</ul> + +!! end + +!! test +HTML bullet list, unclosed tags (bug 5497) +!! input +<ul> +<li>One +<li>Two +</ul> +!! result +<ul> +<li>One +</li><li>Two +</li></ul> + +!! end + +!! test +HTML ordered list, closed tags (bug 5497) +!! input +<ol> +<li>One</li> +<li>Two</li> +</ol> +!! result +<ol> +<li>One</li> +<li>Two</li> +</ol> + +!! end + +!! test +HTML ordered list, unclosed tags (bug 5497) +!! input +<ol> +<li>One +<li>Two +</ol> +!! result +<ol> +<li>One +</li><li>Two +</li></ol> + +!! end + +!! test +HTML nested bullet list, closed tags (bug 5497) +!! input +<ul> +<li>One</li> +<li>Two: +<ul> +<li>Sub-one</li> +<li>Sub-two</li> +</ul> +</li> +</ul> +!! result +<ul> +<li>One</li> +<li>Two: +<ul> +<li>Sub-one</li> +<li>Sub-two</li> +</ul> +</li> +</ul> + +!! end + +!! test +HTML nested bullet list, open tags (bug 5497) +!! input +<ul> +<li>One +<li>Two: +<ul> +<li>Sub-one +<li>Sub-two +</ul> +</ul> +!! result +<ul> +<li>One +</li><li>Two: +<ul> +<li>Sub-one +</li><li>Sub-two +</li></ul> +</li></ul> + +!! end + +!! test +HTML nested ordered list, closed tags (bug 5497) +!! input +<ol> +<li>One</li> +<li>Two: +<ol> +<li>Sub-one</li> +<li>Sub-two</li> +</ol> +</li> +</ol> +!! result +<ol> +<li>One</li> +<li>Two: +<ol> +<li>Sub-one</li> +<li>Sub-two</li> +</ol> +</li> +</ol> + +!! end + +!! test +HTML nested ordered list, open tags (bug 5497) +!! input +<ol> +<li>One +<li>Two: +<ol> +<li>Sub-one +<li>Sub-two +</ol> +</ol> +!! result +<ol> +<li>One +</li><li>Two: +<ol> +<li>Sub-one +</li><li>Sub-two +</li></ol> +</li></ol> + +!! end + +!! test +HTML ordered list item with parameters oddity +!! input +<ol><li id="fragment">One</li></ol> +!! result +<ol><li id="fragment">One</li></ol> + +!! end + +!!test +bug 5918: autonumbering +!! input +[http://first/] [http://second] [ftp://ftp] + +ftp://inlineftp + +[mailto:enclosed@mail.tld With target] + +[mailto:enclosed@mail.tld] + +mailto:inline@mail.tld +!! result +<p><a href="http://first/" class="external autonumber" title="http://first/" rel="nofollow">[1]</a> <a href="http://second" class="external autonumber" title="http://second" rel="nofollow">[2]</a> <a href="ftp://ftp" class="external autonumber" title="ftp://ftp" rel="nofollow">[3]</a> +</p><p><a href="ftp://inlineftp" class="external free" title="ftp://inlineftp" rel="nofollow">ftp://inlineftp</a> +</p><p><a href="mailto:enclosed@mail.tld" class="external text" title="mailto:enclosed@mail.tld" rel="nofollow">With target</a> +</p><p><a href="mailto:enclosed@mail.tld" class="external autonumber" title="mailto:enclosed@mail.tld" rel="nofollow">[4]</a> +</p><p><a href="mailto:inline@mail.tld" class="external free" title="mailto:inline@mail.tld" rel="nofollow">mailto:inline@mail.tld</a> +</p> +!! end + + +# +# Security and HTML correctness +# From Nick Jenkins' fuzz testing +# + +!! test +Fuzz testing: Parser13 +!! input +{| +| http://a| +!! result +<table> +<tr> +<td> +</td> +</tr> +</table> + +!! end + +!! test +Fuzz testing: Parser14 +!! input +== onmouseover= == +http://__TOC__ +!! result +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: onmouseover=">edit</a>]</div><a name="onmouseover.3D"></a><h2> onmouseover= </h2> +http://<table id="toc" class="toc" summary="Contents"><tr><td><div id="toctitle"><h2>Contents</h2></div> +<ul> +<li class="toclevel-1"><a href="#onmouseover.3D"><span class="tocnumber">1</span> <span class="toctext">onmouseover=</span></a></li> +</ul> +</td></tr></table><script type="text/javascript"> if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); } </script> + +!! end + +!! test +Fuzz testing: Parser14-table +!! input +==a== +{| STYLE=__TOC__ +!! result +<div class="editsection" style="float:right;margin-left:5px;">[<a href="/index.php?title=Parser_test&action=edit&section=1" title="Edit section: a">edit</a>]</div><a name="a"></a><h2>a</h2> +<table style="__TOC__"> +<tr><td></td></tr> +</table> + +!! end + +# Known to produce bogus xml (extra </td>) +!! test +Fuzz testing: Parser16 +!! options +noxml +!! input +{| +!https://|||||| +!! result +<table> +<tr> +<th>https://</th><th></th><th></th><th> +</td> +</tr> +</table> + +!! end + +!! test +Fuzz testing: Parser21 +!! input +{| +! irc://{{ftp://a" onmouseover="alert('hello world');" +| +!! result +<table> +<tr> +<th> <a href="irc://{{ftp://a" class="external free" title="irc://{{ftp://a" rel="nofollow">irc://{{ftp://a</a>" onmouseover="alert('hello world');" +</th><td> +</td> +</tr> +</table> + +!! end + +!! test +Fuzz testing: Parser22 +!! input +http://===r:::https://b + +{| +!!result +<p><a href="http://===r:::https://b" class="external free" title="http://===r:::https://b" rel="nofollow">http://===r:::https://b</a> +</p> +<table> +<tr><td></td></tr> +</table> + +!! end + +# Known to produce bad XML for now +!! test +Fuzz testing: Parser24 +!! options +noxml +!! input +{| +{{{| +<u CLASS= +| {{{{SSSll!!!!!!!VVVV)]]][[Special:*xxxxxxx--><noinclude>}}}} > +<br style="onmouseover='alert(document.cookie);' " /> + +MOVE YOUR MOUSE CURSOR OVER THIS TEXT +| +!! result +<table> + +<u class="|">} > +<br style="onmouseover='alert(document.cookie);'" /> + +MOVE YOUR MOUSE CURSOR OVER THIS TEXT +<tr> +<td></u> +</td> +</tr> +</table> + +!! end + +# Known to produce bad XML for now +!!test +Fuzz testing: Parser25 (bug 6055) +!! options +noxml +!! input +{{{ +| +<LI CLASS=|| + > +}}}blah" onmouseover="alert('hello world');" align="left"'''MOVE MOUSE CURSOR OVER HERE +!! result +<li class="||"> +blah" onmouseover="alert('hello world');" align="left"<b>MOVE MOUSE CURSOR OVER HERE</b> + +!! end + +!!test +Fuzz testing: URL adjacent extension (with space, clean) +!! options +!! input +http://example.com <nowiki>junk</nowiki> +!! result +<p><a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a> junk +</p> +!!end + +!!test +Fuzz testing: URL adjacent extension (no space, dirty; nowiki) +!! options +!! input +http://example.com<nowiki>junk</nowiki> +!! result +<p><a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a>junk +</p> +!!end + +!!test +Fuzz testing: URL adjacent extension (no space, dirty; pre) +!! options +!! input +http://example.com<pre>junk</pre> +!! result +<a href="http://example.com" class="external free" title="http://example.com" rel="nofollow">http://example.com</a><pre>junk</pre> + +!!end + +!!test +Fuzz testing: image with bogus manual thumbnail +!!input +[[Image:foobar.jpg|thumbnail= ]] +!!result +<div class="thumb tright"><div style="width:182px;"><a href="/wiki/Image:Foobar.jpg" class="internal" title=""><img src="http://example.com/images/3/3a/Foobar.jpg" alt="" width="180" height="-1" longdesc="/wiki/Image:Foobar.jpg" /></a> <div class="thumbcaption"><div class="magnify" style="float:right"><a href="/wiki/Image:Foobar.jpg" class="internal" title="Enlarge"><img src="/skins/common/images/magnify-clip.png" width="15" height="11" alt="Enlarge" /></a></div></div></div></div> + +!!end + +!! test +Fuzz testing: encoded newline in generated HTML replacements (bug 6577) +!! input +<pre dir=" "></pre> +!! result +<pre dir=" "></pre> + +!! end + +!! test +Parsing optional HTML elements (Bug 6171) +!! options +!! input +<table> + <tr> + <td> Some tabular data</td> + <td> More tabular data ... + <td> And yet som tabular data</td> + </tr> +</table> +!! result +<table> + <tr> + <td> Some tabular data</td> + <td> More tabular data ... + </td><td> And yet som tabular data</td> + </tr> +</table> + +!! end + +!! test +Correct handling of <td>, <tr> (Bug 6171) +!! options +!! input +<table> + <tr> + <td> Some tabular data</td> + <td> More tabular data ...</td> + <td> And yet som tabular data</td> + </tr> +</table> +!! result +<table> + <tr> + <td> Some tabular data</td> + <td> More tabular data ...</td> + <td> And yet som tabular data</td> + </tr> +</table> + +!! end + + +!! test +Parsing crashing regression (fr:JavaScript) +!! input +</body></x> +!! result +<p></body></x> +</p> +!! end + +!! test +Inline wiki vs wiki block nesting +!! input +'''Bold paragraph + +New wiki paragraph +!! result +<p><b>Bold paragraph</b> +</p><p>New wiki paragraph +</p> +!! end + +!! test +Inline HTML vs wiki block nesting +!! input +<b>Bold paragraph + +New wiki paragraph +!! result +<p><b>Bold paragraph</b> +</p><p>New wiki paragraph +</p> +!! end + + +!!test +Mixing markup for italics and bold +!! options +!! input +'''bold''''''bold''bolditalics''''' +!! result +<p><b>bold</b><b>bold<i>bolditalics</i></b> +</p> +!! end + + +!! article +Xyzzyx +!! text +Article for special page transclusion test +!! endarticle + +!! test +Special page transclusion +!! options +!! input +{{Special:Prefixindex/Xyzzyx}} +!! result +<p><br /> +</p> +<table style="background: inherit;" border="0" width="100%"><tr><td><a href="/wiki/Xyzzyx" title="Xyzzyx">Xyzzyx</a></td></tr></table> + +!! end + +!! test +Special page transclusion twice (bug 5021) +!! options +!! input +{{Special:Prefixindex/Xyzzyx}} +{{Special:Prefixindex/Xyzzyx}} +!! result +<p><br /> +</p> +<table style="background: inherit;" border="0" width="100%"><tr><td><a href="/wiki/Xyzzyx" title="Xyzzyx">Xyzzyx</a></td></tr></table> +<p><br /> +</p> +<table style="background: inherit;" border="0" width="100%"><tr><td><a href="/wiki/Xyzzyx" title="Xyzzyx">Xyzzyx</a></td></tr></table> + +!! end + +!! test +Invalid header with following text +!! input += x = y +!! result +<p>= x = y +</p> +!! end + + +!! test +Section extraction test (section 0) +!! options +section=0 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +!! end + +!! test +Section extraction test (section 1) +!! options +section=1 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +==a== +===aa=== +====aaa==== +!! end + +!! test +Section extraction test (section 2) +!! options +section=2 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +===aa=== +====aaa==== +!! end + +!! test +Section extraction test (section 3) +!! options +section=3 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +====aaa==== +!! end + +!! test +Section extraction test (section 4) +!! options +section=4 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +==b== +===ba=== +===bb=== +====bba==== +===bc=== +!! end + +!! test +Section extraction test (section 5) +!! options +section=5 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +===ba=== +!! end + +!! test +Section extraction test (section 6) +!! options +section=6 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +===bb=== +====bba==== +!! end + +!! test +Section extraction test (section 7) +!! options +section=7 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +====bba==== +!! end + +!! test +Section extraction test (section 8) +!! options +section=8 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +===bc=== +!! end + +!! test +Section extraction test (section 9) +!! options +section=9 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +==c== +===ca=== +!! end + +!! test +Section extraction test (section 10) +!! options +section=10 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +===ca=== +!! end + +!! test +Section extraction test (nonexistent section 11) +!! options +section=11 +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +!! end + +!! test +Section extraction test with bogus heading (section 1) +!! options +section=1 +!! input +==a== +==bogus== not a legal section +==b== +!! result +==a== +==bogus== not a legal section +!! end + +!! test +Section extraction test with bogus heading (section 2) +!! options +section=2 +!! input +==a== +==bogus== not a legal section +==b== +!! result +==b== +!! end + +!! test +Section extraction test with comment after heading (section 1) +!! options +section=1 +!! input +==a== +==legal== <!-- a legal section --> +==b== +!! result +==a== +!! end + +!! test +Section extraction test with comment after heading (section 2) +!! options +section=2 +!! input +==a== +==legal== <!-- a legal section --> +==b== +!! result +==legal== <!-- a legal section --> +!! end + +!! test +Section extraction test with bogus <nowiki> heading (section 1) +!! options +section=1 +!! input +==a== +==bogus== <nowiki>not a legal section</nowiki> +==b== +!! result +==a== +==bogus== <nowiki>not a legal section</nowiki> +!! end + +!! test +Section extraction test with bogus <nowiki> heading (section 2) +!! options +section=2 +!! input +==a== +==bogus== <nowiki>not a legal section</nowiki> +==b== +!! result +==b== +!! end + + +!! test +Section extraction prefixed by comment (section 1) (bug 2587) +!! options +section=1 +!! input +<!-- -->==sec1== +==sec2== +!!result +<!-- -->==sec1== +!!end + +!! test +Section extraction prefixed by comment (section 2) (bug 2587) +!! options +section=2 +!! input +<!-- -->==sec1== +==sec2== +!!result +==sec2== +!!end + + +!! test +Section extraction, mixed wiki and html (section 1) (bug 2607) +!! options +section=1 +!! input +<h2>1</h2> +one +==2== +two +==3== +three +!! result +<h2>1</h2> +one +!! end + +!! test +Section extraction, mixed wiki and html (section 2) (bug 2607) +!! options +section=2 +!! input +<h2>1</h2> +one +==2== +two +==3== +three +!! result +==2== +two +!! end + + +!! test +Section extraction, heading surrounded by <noinclude> (bug 3342) +!! options +section=1 +!! input +<noinclude>==a==</noinclude> +text +!! result +<noinclude>==a==</noinclude> +text +!!end + + +!! test +Section extraction, HTML heading subsections (bug 5272) +!! options +section=1 +!! input +<h2>a</h2> +<h3>aa</h3> +<h2>b</h2> +!! result +<h2>a</h2> +<h3>aa</h3> +!! end + +!! test +Section extraction, HTML headings should be ignored in extensions (bug 3476) +!! options +section=2 +!! input +<h2>a</h2> +<tag> +<h2>not b</h2> +</tag> +<h2>b</h2> +!! result +<h2>b</h2> +!! end + +!! test +Section replacement test (section 0) +!! options +replace=0,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +xxx + +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 1) +!! options +replace=1,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +xxx + +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 2) +!! options +replace=2,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +xxx + +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 3) +!! options +replace=3,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +xxx + +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 4) +!! options +replace=4,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +xxx + +==c== +===ca=== +!! end + +!! test +Section replacement test (section 5) +!! options +replace=5,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +xxx + +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 6) +!! options +replace=6,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +xxx + +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 7) +!! options +replace=7,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +xxx + +===bc=== +==c== +===ca=== +!! end + +!! test +Section replacement test (section 8) +!! options +replace=8,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +xxx + +==c== +===ca=== +!!end + +!! test +Section replacement test (section 9) +!! options +replace=9,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +xxx +!! end + +!! test +Section replacement test (section 10) +!! options +replace=10,"xxx" +!! input +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +===ca=== +!! result +start +==a== +===aa=== +====aaa==== +==b== +===ba=== +===bb=== +====bba==== +===bc=== +==c== +xxx +!! end + + +!! test +Section extraction, HTML headings not at line boundaries (section 0) +!! options +section=0 +!! input +<h2>Evil</h2><i>blah blah blah</i> + +evil blah + +<h2>Nice</h2> + +nice blah + +<i>extra evil</i><h2>Extra nasty</h2> + +extra nasty +!! result +!! end + +!! test +Section extraction, HTML headings not at line boundaries (section 1) +!! options +section=1 +!! input +<h2>Evil</h2><i>blah blah blah</i> + +evil blah + +<h2>Nice</h2> + +nice blah + +<i>extra evil</i><h2>Extra nasty</h2> + +extra nasty +!! result +<h2>Evil</h2><i>blah blah blah</i> + +evil blah +!! end + +!! test +Section extraction, HTML headings not at line boundaries (section 2) +!! options +section=2 +!! input +<h2>Evil</h2><i>blah blah blah</i> + +evil blah + +<h2>Nice</h2> + +nice blah + +<i>extra evil</i><h2>Extra nasty</h2> + +extra nasty +!! result +<h2>Nice</h2> + +nice blah + +<i>extra evil</i> +!! end + +!! test +Section extraction, HTML headings not at line boundaries (section 3) +!! options +section=3 +!! input +<h2>Evil</h2><i>blah blah blah</i> + +evil blah + +<h2>Nice</h2> + +nice blah + +<i>extra evil</i><h2>Extra nasty</h2> + +extra nasty +!! result +<h2>Extra nasty</h2> + +extra nasty +!! end + + +!! test +Section extraction, heading followed by pre with 20 spaces (bug 6398) +!! options +section=1 +!! input +==a== + a +!! result +==a== + a +!! end + +!! test +Section extraction, heading followed by pre with 19 spaces (bug 6398 sanity check) +!! options +section=1 +!! input +==a== + a +!! result +==a== + a +!! end + +!! test +Handling of 
 in URLs +!! input +**irc://
a +!! result +<ul><li><ul><li><a href="irc://%0Aa" class="external free" title="irc://%0Aa" rel="nofollow">irc://%0Aa</a> +</li></ul> +</li></ul> + +!!end +# +# +# + +TODO: +more images +more tables +math +character entities +and much more diff --git a/maintenance/parserTestsParserHook.php b/maintenance/parserTestsParserHook.php new file mode 100644 index 00000000..65e41aae --- /dev/null +++ b/maintenance/parserTestsParserHook.php @@ -0,0 +1,34 @@ +<?php +if ( ! defined( 'MEDIAWIKI' ) ) + die( -1 ); +/** + * A basic extension that's used by the parser tests to test whether input and + * arguments are passed to extensions properly. + * + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @copyright Copyright © 2005, 2006 Ævar Arnfjörð Bjarmason + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later + */ + +$wgHooks['ParserTestParser'][] = 'wfParserTestParserHookSetup'; + +function wfParserTestParserHookSetup( &$parser ) { + $parser->setHook( 'tag', 'wfParserTestParserHookHook' ); + + return true; +} + +function wfParserTestParserHookHook( $in, $argv ) { + ob_start(); + var_dump( + $in, + $argv + ); + $ret = ob_get_clean(); + + return "<pre>\n$ret</pre>"; +} +?> diff --git a/maintenance/parserTestsParserTime.php b/maintenance/parserTestsParserTime.php new file mode 100644 index 00000000..705f9ce7 --- /dev/null +++ b/maintenance/parserTestsParserTime.php @@ -0,0 +1,26 @@ +<?php +if ( ! defined( 'MEDIAWIKI' ) ) + die( -1 ); +/** + * A basic extension that's used by the parser tests to test date magic words + * + * Handy so that we don't have to upgrade the parsertests every second to + * compensate with the passage of time and certainly less expensive than a + * time-freezing device, get yours now! + * + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @copyright Copyright © 2005, 2006 Ævar Arnfjörð Bjarmason + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later + */ + +$wgHooks['ParserGetVariableValueTs'][] = 'wfParserTimeSetup'; + +function wfParserTimeSetup( &$parser, &$ts ) { + $ts = 123; //$ perl -le 'print scalar localtime 123' ==> Thu Jan 1 00:02:03 1970 + + return true; +} +?> diff --git a/maintenance/parserTestsStaticParserHook.php b/maintenance/parserTestsStaticParserHook.php new file mode 100644 index 00000000..ac365aca --- /dev/null +++ b/maintenance/parserTestsStaticParserHook.php @@ -0,0 +1,44 @@ +<?php +if ( ! defined( 'MEDIAWIKI' ) ) + die( -1 ); +/** + * A basic extension that's used by the parser tests to test whether the parser + * calls extensions when they're called inside comments, it shouldn't do that + * + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @copyright Copyright © 2005, 2006 Ævar Arnfjörð Bjarmason + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later + */ + +$wgHooks['ParserTestParser'][] = 'wfParserTestStaticParserHookSetup'; + +function wfParserTestStaticParserHookSetup( &$parser ) { + $parser->setHook( 'statictag', 'wfParserTestStaticParserHookHook' ); + + return true; +} + +function wfParserTestStaticParserHookHook( $in, $argv ) { + static $buf = null; + + if ( ! count( $argv ) ) { + $buf = $in; + return ''; + } else if ( count( $argv ) === 1 && $argv['action'] === 'flush' && $in === null ) { + // Clear the buffer, we probably don't need to + $tmp = $buf; + $buf = null; + return $tmp; + } else + // wtf? + die( + "\nCall this extension as <statictag>string</statictag> or as" . + " <statictag action=flush/>, not in any other way.\n" . + "text: " . var_export( $in, true ) . "\n" . + "argv: " . var_export( $argv, true ) . "\n" + ); +} +?> diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql new file mode 100644 index 00000000..5481a394 --- /dev/null +++ b/maintenance/postgres/tables.sql @@ -0,0 +1,420 @@ +-- SQL to create the initial tables for the MediaWiki database. +-- This is read and executed by the install script; you should +-- not have to run it by itself unless doing a manual install. +-- This is the PostgreSQL version. +-- For information about each table, please see the notes in maintenance/tables.sql +-- Please make sure all dollar-quoting uses $mw$ at the start of the line +-- We can't use SERIAL everywhere: the sequence names are hard-coded into the PHP +-- TODO: Change CHAR to BOOL + +BEGIN; +SET client_min_messages = 'ERROR'; + +CREATE SEQUENCE user_user_id_seq MINVALUE 0 START WITH 0; +CREATE TABLE "user" ( + user_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('user_user_id_seq'), + user_name TEXT NOT NULL UNIQUE, + user_real_name TEXT, + user_password TEXT, + user_newpassword TEXT, + user_token CHAR(32), + user_email TEXT, + user_email_token CHAR(32), + user_email_token_expires TIMESTAMPTZ, + user_email_authenticated TIMESTAMPTZ, + user_options TEXT, + user_touched TIMESTAMPTZ, + user_registration TIMESTAMPTZ +); +CREATE INDEX user_email_token_idx ON "user" (user_email_token); + +-- Create a dummy user to satisfy fk contraints especially with revisions +INSERT INTO "user" VALUES + (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); + +CREATE TABLE user_groups ( + ug_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + ug_group TEXT NOT NULL +); +CREATE UNIQUE INDEX user_groups_unique ON user_groups (ug_user, ug_group); + +CREATE TABLE user_newtalk ( + user_id INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + user_ip CIDR NULL +); +CREATE INDEX user_newtalk_id_idx ON user_newtalk (user_id); +CREATE INDEX user_newtalk_ip_idx ON user_newtalk (user_ip); + + +CREATE SEQUENCE page_page_id_seq; +CREATE TABLE page ( + page_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('page_page_id_seq'), + page_namespace SMALLINT NOT NULL, + page_title TEXT NOT NULL, + page_restrictions TEXT, + page_counter BIGINT NOT NULL DEFAULT 0, + page_is_redirect CHAR NOT NULL DEFAULT 0, + page_is_new CHAR NOT NULL DEFAULT 0, + page_random NUMERIC(15,14) NOT NULL DEFAULT RANDOM(), + page_touched TIMESTAMPTZ, + page_latest INTEGER NOT NULL, -- FK? + page_len INTEGER NOT NULL +); +CREATE UNIQUE INDEX page_unique_name ON page (page_namespace, page_title); +CREATE INDEX page_main_title ON page (page_title) WHERE page_namespace = 0; +CREATE INDEX page_talk_title ON page (page_title) WHERE page_namespace = 1; +CREATE INDEX page_user_title ON page (page_title) WHERE page_namespace = 2; +CREATE INDEX page_utalk_title ON page (page_title) WHERE page_namespace = 3; +CREATE INDEX page_project_title ON page (page_title) WHERE page_namespace = 4; +CREATE INDEX page_random_idx ON page (page_random); +CREATE INDEX page_len_idx ON page (page_len); + +-- Create a dummy page to satisfy fk contraints where a page_id of "0" is added +INSERT INTO page (page_id,page_namespace,page_title,page_random,page_latest,page_len) +VALUES (0,0,'',0.0,0,0); + + +CREATE SEQUENCE rev_rev_id_val; +CREATE TABLE revision ( + rev_id INTEGER NOT NULL UNIQUE DEFAULT nextval('rev_rev_id_val'), + rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE SET NULL, + rev_text_id INTEGER NULL, -- FK + rev_comment TEXT, + rev_user INTEGER NOT NULL REFERENCES "user"(user_id), + rev_user_text TEXT NOT NULL, + rev_timestamp TIMESTAMPTZ NOT NULL, + rev_minor_edit CHAR NOT NULL DEFAULT '0', + rev_deleted CHAR NOT NULL DEFAULT '0' +); +CREATE UNIQUE INDEX revision_unique ON revision (rev_page, rev_id); +CREATE INDEX rev_timestamp_idx ON revision (rev_timestamp); +CREATE INDEX rev_user_idx ON revision (rev_user); +CREATE INDEX rev_user_text_idx ON revision (rev_user_text); + + +CREATE SEQUENCE text_old_id_val; +CREATE TABLE "text" ( + old_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('text_old_id_val'), + old_text TEXT, + old_flags TEXT +); + + +CREATE TABLE archive ( + ar_namespace SMALLINT NOT NULL, + ar_title TEXT NOT NULL, + ar_text TEXT, + ar_comment TEXT, + ar_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + ar_user_text TEXT NOT NULL, + ar_timestamp TIMESTAMPTZ NOT NULL, + ar_minor_edit CHAR NOT NULL DEFAULT '0', + ar_flags TEXT, + ar_rev_id INTEGER, + ar_text_id INTEGER +); +CREATE INDEX archive_name_title_timestamp ON archive (ar_namespace,ar_title,ar_timestamp); + + +CREATE TABLE pagelinks ( + pl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + pl_namespace SMALLINT NOT NULL, + pl_title TEXT NOT NULL +); +CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_namespace,pl_title,pl_from); + +CREATE TABLE templatelinks ( + tl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + tl_namespace TEXT NOT NULL, + tl_title TEXT NOT NULL +); +CREATE UNIQUE INDEX templatelinks_unique ON templatelinks (tl_namespace,tl_title,tl_from); + +CREATE TABLE imagelinks ( + il_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + il_to TEXT NOT NULL +); +CREATE UNIQUE INDEX il_from ON imagelinks (il_to,il_from); + +CREATE TABLE categorylinks ( + cl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + cl_to TEXT NOT NULL, + cl_sortkey TEXT, + cl_timestamp TIMESTAMPTZ NOT NULL +); +CREATE UNIQUE INDEX cl_from ON categorylinks (cl_from, cl_to); +CREATE INDEX cl_sortkey ON categorylinks (cl_to, cl_sortkey); + +CREATE TABLE externallinks ( + el_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + el_to TEXT NOT NULL, + el_index TEXT NOT NULL +); +CREATE INDEX externallinks_from_to ON externallinks (el_from,el_to); +CREATE INDEX externallinks_index ON externallinks (el_index); + +CREATE TABLE langlinks ( + ll_from INTEGER NOT NULL REFERENCES page (page_id) ON DELETE CASCADE, + ll_lang TEXT, + ll_title TEXT +); +CREATE UNIQUE INDEX langlinks_unique ON langlinks (ll_from,ll_lang); +CREATE INDEX langlinks_lang_title ON langlinks (ll_lang,ll_title); + + +CREATE TABLE site_stats ( + ss_row_id INTEGER NOT NULL UNIQUE, + ss_total_views INTEGER DEFAULT 0, + ss_total_edits INTEGER DEFAULT 0, + ss_good_articles INTEGER DEFAULT 0, + ss_total_pages INTEGER DEFAULT -1, + ss_users INTEGER DEFAULT -1, + ss_admins INTEGER DEFAULT -1, + ss_images INTEGER DEFAULT 0 +); + +CREATE TABLE hitcounter ( + hc_id BIGINT NOT NULL +); + + +CREATE SEQUENCE ipblocks_ipb_id_val; +CREATE TABLE ipblocks ( + ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'), + ipb_address CIDR NULL, + ipb_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + ipb_by INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + ipb_reason TEXT NOT NULL, + ipb_timestamp TIMESTAMPTZ NOT NULL, + ipb_auto CHAR NOT NULL DEFAULT '0', + ipb_expiry TIMESTAMPTZ NOT NULL, + ipb_range_start TEXT, + ipb_range_end TEXT +); +CREATE INDEX ipb_address ON ipblocks (ipb_address); +CREATE INDEX ipb_user ON ipblocks (ipb_user); +CREATE INDEX ipb_range ON ipblocks (ipb_range_start,ipb_range_end); + + +CREATE TABLE image ( + img_name TEXT NOT NULL PRIMARY KEY, + img_size SMALLINT NOT NULL, + img_width SMALLINT NOT NULL, + img_height SMALLINT NOT NULL, + img_metadata TEXT, + img_bits SMALLINT, + img_media_type TEXT, + img_major_mime TEXT DEFAULT 'unknown', + img_minor_mime TEXT DEFAULT 'unknown', + img_description TEXT NOT NULL, + img_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + img_user_text TEXT NOT NULL, + img_timestamp TIMESTAMPTZ +); +CREATE INDEX img_size_idx ON image (img_size); +CREATE INDEX img_timestamp_idx ON image (img_timestamp); + +CREATE TABLE oldimage ( + oi_name TEXT NOT NULL REFERENCES image(img_name), + oi_archive_name TEXT NOT NULL, + oi_size SMALLINT NOT NULL, + oi_width SMALLINT NOT NULL, + oi_height SMALLINT NOT NULL, + oi_bits SMALLINT NOT NULL, + oi_description TEXT, + oi_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + oi_user_text TEXT NOT NULL, + oi_timestamp TIMESTAMPTZ NOT NULL +); +CREATE INDEX oi_name ON oldimage (oi_name); + + +CREATE TABLE filearchive ( + fa_id SERIAL NOT NULL PRIMARY KEY, + fa_name TEXT NOT NULL, + fa_archive_name TEXT, + fa_storage_group VARCHAR(16), + fa_storage_key CHAR(64), + fa_deleted_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + fa_deleted_timestamp TIMESTAMPTZ NOT NULL, + fa_deleted_reason TEXT, + fa_size SMALLINT NOT NULL, + fa_width SMALLINT NOT NULL, + fa_height SMALLINT NOT NULL, + fa_metadata TEXT, + fa_bits SMALLINT, + fa_media_type TEXT, + fa_major_mime TEXT DEFAULT 'unknown', + fa_minor_mime TEXT DEFAULT 'unknown', + fa_description TEXT NOT NULL, + fa_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + fa_user_text TEXT NOT NULL, + fa_timestamp TIMESTAMPTZ +); +CREATE INDEX fa_name_time ON filearchive (fa_name, fa_timestamp); +CREATE INDEX fa_dupe ON filearchive (fa_storage_group, fa_storage_key); +CREATE INDEX fa_notime ON filearchive (fa_deleted_timestamp); +CREATE INDEX fa_nouser ON filearchive (fa_deleted_user); + + +CREATE SEQUENCE rc_rc_id_seq; +CREATE TABLE recentchanges ( + rc_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('rc_rc_id_seq'), + rc_timestamp TIMESTAMPTZ NOT NULL, + rc_cur_time TIMESTAMPTZ NOT NULL, + rc_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + rc_user_text TEXT NOT NULL, + rc_namespace SMALLINT NOT NULL, + rc_title TEXT NOT NULL, + rc_comment TEXT, + rc_minor CHAR NOT NULL DEFAULT '0', + rc_bot CHAR NOT NULL DEFAULT '0', + rc_new CHAR NOT NULL DEFAULT '0', + rc_cur_id INTEGER NOT NULL REFERENCES page(page_id), + rc_this_oldid INTEGER NOT NULL, + rc_last_oldid INTEGER NOT NULL, + rc_type CHAR NOT NULL DEFAULT '0', + rc_moved_to_ns SMALLINT, + rc_moved_to_title TEXT, + rc_patrolled CHAR NOT NULL DEFAULT '0', + rc_ip CIDR +); +CREATE INDEX rc_timestamp ON recentchanges (rc_timestamp); +CREATE INDEX rc_namespace_title ON recentchanges (rc_namespace, rc_title); +CREATE INDEX rc_cur_id ON recentchanges (rc_cur_id); +CREATE INDEX new_name_timestamp ON recentchanges (rc_new, rc_namespace, rc_timestamp); +CREATE INDEX rc_ip ON recentchanges (rc_ip); + + +CREATE TABLE watchlist ( + wl_user INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + wl_namespace SMALLINT NOT NULL DEFAULT 0, + wl_title TEXT NOT NULL, + wl_notificationtimestamp TIMESTAMPTZ +); +CREATE UNIQUE INDEX wl_user_namespace_title ON watchlist (wl_namespace, wl_title, wl_user); + + +CREATE TABLE math ( + math_inputhash TEXT NOT NULL UNIQUE, + math_outputhash TEXT NOT NULL, + math_html_conservativeness SMALLINT NOT NULL, + math_html TEXT, + math_mathml TEXT +); + + +CREATE TABLE interwiki ( + iw_prefix TEXT NOT NULL UNIQUE, + iw_url TEXT NOT NULL, + iw_local CHAR NOT NULL, + iw_trans CHAR NOT NULL DEFAULT '0' +); + + +CREATE TABLE querycache ( + qc_type TEXT NOT NULL, + qc_value SMALLINT NOT NULL, + qc_namespace SMALLINT NOT NULL, + qc_title TEXT NOT NULL +); +CREATE INDEX querycache_type_value ON querycache (qc_type, qc_value); + +CREATE TABLE querycache_info ( + qci_type TEXT UNIQUE, + qci_timestamp TIMESTAMPTZ NULL +); + +CREATE TABLE objectcache ( + keyname CHAR(255) UNIQUE, + value BYTEA NOT NULL DEFAULT '', + exptime TIMESTAMPTZ NOT NULL +); +CREATE INDEX objectcacache_exptime ON objectcache (exptime); + +CREATE TABLE transcache ( + tc_url TEXT NOT NULL UNIQUE, + tc_contents TEXT NOT NULL, + tc_time TIMESTAMPTZ NOT NULL +); + + +CREATE TABLE logging ( + log_type TEXT NOT NULL, + log_action TEXT NOT NULL, + log_timestamp TIMESTAMPTZ NOT NULL, + log_user INTEGER REFERENCES "user"(user_id) ON DELETE SET NULL, + log_namespace SMALLINT NOT NULL, + log_title TEXT NOT NULL, + log_comment TEXT, + log_params TEXT +); +CREATE INDEX logging_type_name ON logging (log_type, log_timestamp); +CREATE INDEX logging_user_time ON logging (log_timestamp, log_user); +CREATE INDEX logging_page_time ON logging (log_namespace, log_title, log_timestamp); + + +CREATE TABLE trackbacks ( + tb_id SERIAL NOT NULL PRIMARY KEY, + tb_page INTEGER REFERENCES page(page_id) ON DELETE CASCADE, + tb_title TEXT NOT NULL, + tb_url TEXT NOT NULL, + tb_ex TEXT, + tb_name TEXT +); +CREATE INDEX trackback_page ON trackbacks (tb_page); + + +CREATE SEQUENCE job_job_id_seq; +CREATE TABLE job ( + job_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('job_job_id_seq'), + job_cmd TEXT NOT NULL, + job_namespace SMALLINT NOT NULL, + job_title TEXT NOT NULL, + job_params TEXT NOT NULL +); +CREATE INDEX job_cmd_namespace_title ON job (job_cmd, job_namespace, job_title); + +-- Tsearch2 2 stuff. Will fail if we don't have proper access to the tsearch2 tables + +ALTER TABLE page ADD titlevector tsvector; +CREATE INDEX ts2_page_title ON page USING gist(titlevector); +CREATE FUNCTION ts2_page_title() RETURNS TRIGGER LANGUAGE plpgsql AS +$mw$ +BEGIN +IF TG_OP = 'INSERT' THEN + NEW.titlevector = to_tsvector(NEW.page_title); +ELSIF NEW.page_title != OLD.page_title THEN + NEW.titlevector := to_tsvector(NEW.page_title); +END IF; +RETURN NEW; +END; +$mw$; + +CREATE TRIGGER ts2_page_title BEFORE INSERT OR UPDATE ON page +FOR EACH ROW EXECUTE PROCEDURE ts2_page_title(); + + +ALTER TABLE text ADD textvector tsvector; +CREATE INDEX ts2_page_text ON text USING gist(textvector); +CREATE FUNCTION ts2_page_text() RETURNS TRIGGER LANGUAGE plpgsql AS +$mw$ +BEGIN +IF TG_OP = 'INSERT' THEN + NEW.textvector = to_tsvector(NEW.old_text); +ELSIF NEW.old_text != OLD.old_text THEN + NEW.textvector := to_tsvector(NEW.old_text); +END IF; +RETURN NEW; +END; +$mw$; + +CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON text +FOR EACH ROW EXECUTE PROCEDURE ts2_page_text(); + +CREATE OR REPLACE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS +$mw$ + INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES ($1,$2,$3); + SELECT 1; +$mw$; +COMMIT; diff --git a/maintenance/purgeOldText.inc b/maintenance/purgeOldText.inc new file mode 100644 index 00000000..0bf6225a --- /dev/null +++ b/maintenance/purgeOldText.inc @@ -0,0 +1,63 @@ +<?php + +/** + * Support functions for cleaning up redundant text records + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +function PurgeRedundantText( $delete = false ) { + + # Data should come off the master, wrapped in a transaction + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_arc = $dbw->tableName( 'archive' ); + $tbl_rev = $dbw->tableName( 'revision' ); + $tbl_txt = $dbw->tableName( 'text' ); + + # Get "active" text records from the revisions table + echo( "Searching for active text records in revisions table..." ); + $res = $dbw->query( "SELECT DISTINCTROW rev_text_id FROM $tbl_rev" ); + while( $row = $dbw->fetchObject( $res ) ) { + $cur[] = $row->rev_text_id; + } + echo( "done.\n" ); + + # Get "active" text records from the archive table + echo( "Searching for active text records in archive table..." ); + $res = $dbw->query( "SELECT DISTINCTROW ar_text_id FROM $tbl_arc" ); + while( $row = $dbw->fetchObject( $res ) ) { + $cur[] = $row->ar_text_id; + } + echo( "done.\n" ); + + # Get the IDs of all text records not in these sets + echo( "Searching for inactive text records..." ); + $set = implode( ', ', $cur ); + $res = $dbw->query( "SELECT old_id FROM $tbl_txt WHERE old_id NOT IN ( $set )" ); + while( $row = $dbw->fetchObject( $res ) ) { + $old[] = $row->old_id; + } + echo( "done.\n" ); + + # Inform the user of what we're going to do + $count = count( $old ); + echo( "$count inactive items found.\n" ); + + # Delete as appropriate + if( $delete && $count ) { + echo( "Deleting..." ); + $set = implode( ', ', $old ); + $dbw->query( "DELETE FROM $tbl_txt WHERE old_id IN ( $set )" ); + echo( "done.\n" ); + } + + # Done + $dbw->commit(); + +} + +?>
\ No newline at end of file diff --git a/maintenance/purgeOldText.php b/maintenance/purgeOldText.php new file mode 100644 index 00000000..e8a738ad --- /dev/null +++ b/maintenance/purgeOldText.php @@ -0,0 +1,30 @@ +<?php + +/** + * Purge old text records from the database + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +$options = array( 'purge', 'help' ); +require_once( 'commandLine.inc' ); +require_once( 'purgeOldText.inc' ); + +echo( "Purge Old Text\n\n" ); + +if( @$options['help'] ) { + ShowUsage(); +} else { + PurgeRedundantText( @$options['purge'] ); +} + +function ShowUsage() { + echo( "Prunes unused text records from the database.\n\n" ); + echo( "Usage: php purgeOldText.php [--purge]\n\n" ); + echo( "purge : Performs the deletion\n" ); + echo( " help : Show this usage information\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/reassignEdits.inc.php b/maintenance/reassignEdits.inc.php new file mode 100644 index 00000000..6e54aea1 --- /dev/null +++ b/maintenance/reassignEdits.inc.php @@ -0,0 +1,144 @@ +<?php + +/** + * Support functions for the reassignEdits script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + * @licence GNU General Public Licence 2.0 or later + */ + +/** + * Reassign edits from one user to another + * + * @param $from User to take edits from + * @param $to User to assign edits to + * @param $rc Update the recent changes table + * @param $report Don't change things; just echo numbers + * @return integer Number of entries changed, or that would be changed + */ +function reassignEdits( &$from, &$to, $rc = false, $report = false ) { + $dbw =& wfGetDB( DB_MASTER ); + $dbw->immediateBegin(); + $fname = 'reassignEdits'; + + # Count things + out( "Checking current edits..." ); + $res = $dbw->select( 'revision', 'COUNT(*) AS count', userConditions( $from, 'rev_user', 'rev_user_text' ), $fname ); + $row = $dbw->fetchObject( $res ); + $cur = $row->count; + out( "found {$cur}.\n" ); + + out( "Checking deleted edits..." ); + $res = $dbw->select( 'archive', 'COUNT(*) AS count', userConditions( $from, 'ar_user', 'ar_user_text' ), $fname ); + $row = $dbw->fetchObject( $res ); + $del = $row->count; + out( "found {$del}.\n" ); + + # Don't count recent changes if we're not supposed to + if( $rc ) { + out( "Checking recent changes..." ); + $res = $dbw->select( 'recentchanges', 'COUNT(*) AS count', userConditions( $from, 'rc_user', 'rc_user_text' ), $fname ); + $row = $dbw->fetchObject( $res ); + $rec = $row->count; + out( "found {$rec}.\n" ); + } else { + $rec = 0; + } + + $total = $cur + $del + $rec; + out( "\nTotal entries to change: {$total}\n" ); + + if( !$report ) { + if( $total ) { + # Reassign edits + out( "\nReassigning current edits..." ); + $res = $dbw->update( 'revision', userSpecification( $to, 'rev_user', 'rev_user_text' ), userConditions( $from, 'rev_user', 'rev_user_text' ), $fname ); + out( "done.\nReassigning deleted edits..." ); + $res = $dbw->update( 'archive', userSpecification( $to, 'ar_user', 'ar_user_text' ), userConditions( $from, 'ar_user', 'ar_user_text' ), $fname ); + out( "done.\n" ); + # Update recent changes if required + if( $rc ) { + out( "Updating recent changes..." ); + $res = $dbw->update( 'recentchanges', userSpecification( $to, 'rc_user', 'rc_user_text' ), userConditions( $from, 'rc_user', 'rc_user_text' ), $fname ); + out( "done.\n" ); + } + } + } + + $dbw->immediateCommit(); + return (int)$total; +} + +/** + * Return the most efficient set of user conditions + * i.e. a user => id mapping, or a user_text => text mapping + * + * @param $user User for the condition + * @param $idfield Field name containing the identifier + * @param $utfield Field name containing the user text + * @return array + */ +function userConditions( &$user, $idfield, $utfield ) { + return $user->getId() ? array( $idfield => $user->getID() ) : array( $utfield => $user->getName() ); +} + +/** + * Return user specifications + * i.e. user => id, user_text => text + * + * @param $user User for the spec + * @param $idfield Field name containing the identifier + * @param $utfield Field name containing the user text + * @return array + */ +function userSpecification( &$user, $idfield, $utfield ) { + return array( $idfield => $user->getId(), $utfield => $user->getName() ); +} + +/** + * Echo output if $wgSilent is off + * + * @param $output Output to echo + * @return bool True if the output was echoed + */ +function out( $output ) { + global $wgSilent; + if( !$wgSilent ) { + echo( $output ); + return true; + } else { + return false; + } +} + +/** + * Mutator for $wgSilent + * + * @param $silent Switch on $wgSilent + */ +function silent( $silent = true ) { + global $wgSilent; + $wgSilent = $silent; +} + +/** + * Initialise the user object + * + * @param $username Username or IP address + * @return User + */ +function initialiseUser( $username ) { + if( User::isIP( $username ) ) { + $user = new User(); + $user->setId( 0 ); + $user->setName( $username ); + } else { + $user = User::newFromName( $username ); + } + $user->loadFromDatabase(); + return $user; +} + +?>
\ No newline at end of file diff --git a/maintenance/reassignEdits.php b/maintenance/reassignEdits.php new file mode 100644 index 00000000..4ac566af --- /dev/null +++ b/maintenance/reassignEdits.php @@ -0,0 +1,57 @@ +<?php + +/** + * Reassign edits from a user or IP address to another user + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + * @licence GNU General Public Licence 2.0 or later + */ + +$options = array( 'force', 'norc', 'quiet', 'report' ); +require_once( 'commandLine.inc' ); +require_once( 'reassignEdits.inc.php' ); + +# Set silent mode; --report overrides --quiet +if( !@$options['report'] && @$options['quiet'] ) + setSilent(); + +out( "Reassign Edits\n\n" ); + +if( @$args[0] && @$args[1] ) { + + # Set up the users involved + $from =& initialiseUser( $args[0] ); + $to =& initialiseUser( $args[1] ); + + # If the target doesn't exist, and --force is not set, stop here + if( $to->getId() || @$options['force'] ) { + # Reassign the edits + $report = @$options['report']; + $count = reassignEdits( $from, $to, !@$options['norc'], $report ); + # If reporting, and there were items, advise the user to run without --report + if( $report ) + out( "Run the script again without --report to update.\n" ); + } else { + $ton = $to->getName(); + echo( "User '{$ton}' not found.\n" ); + } + +} else { + ShowUsage(); +} + +/** Show script usage information */ +function ShowUsage() { + echo( "Reassign edits from one user to another.\n\n" ); + echo( "Usage: php reassignEdits.php [--force|--quiet|--norc|--report] <from> <to>\n\n" ); + echo( " <from> : Name of the user to assign edits from\n" ); + echo( " <to> : Name of the user to assign edits to\n" ); + echo( " --force : Reassign even if the target user doesn't exist\n" ); + echo( " --quiet : Don't print status information (except for errors)\n" ); + echo( " --norc : Don't update the recent changes table\n" ); + echo( " --report : Print out details of what would be changed, but don't update it\n\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/rebuildImages.php b/maintenance/rebuildImages.php new file mode 100644 index 00000000..45477097 --- /dev/null +++ b/maintenance/rebuildImages.php @@ -0,0 +1,275 @@ +<?php +/* + * Script to update image metadata records + * + * Usage: php rebuildImages.php [--missing] [--dry-run] + * Options: + * --missing Crawl the uploads dir for images without records, and + * add them only. + * + * Copyright (C) 2005 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @author Brion Vibber <brion at pobox.com> + * @package MediaWiki + * @subpackage maintenance + */ + +$options = array( 'missing', 'dry-run' ); + +require_once( 'commandLine.inc' ); +require_once( 'FiveUpgrade.inc' ); + +class ImageBuilder extends FiveUpgrade { + function ImageBuilder( $dryrun = false ) { + parent::FiveUpgrade(); + + $this->maxLag = 10; # if slaves are lagged more than 10 secs, wait + $this->dryrun = $dryrun; + } + + function build() { + $this->buildImage(); + $this->buildOldImage(); + } + + function init( $count, $table ) { + $this->processed = 0; + $this->updated = 0; + $this->count = $count; + $this->startTime = wfTime(); + $this->table = $table; + } + + function progress( $updated ) { + $this->updated += $updated; + $this->processed++; + if( $this->processed % 100 != 0 ) { + return; + } + $portion = $this->processed / $this->count; + $updateRate = $this->updated / $this->processed; + + $now = wfTime(); + $delta = $now - $this->startTime; + $estimatedTotalTime = $delta / $portion; + $eta = $this->startTime + $estimatedTotalTime; + + printf( "%s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n", + wfTimestamp( TS_DB, intval( $now ) ), + $portion * 100.0, + $this->table, + wfTimestamp( TS_DB, intval( $eta ) ), + $completed, + $this->count, + $rate, + $updateRate * 100.0 ); + flush(); + } + + function buildTable( $table, $key, $callback ) { + $fname = 'ImageBuilder::buildTable'; + + $count = $this->dbw->selectField( $table, 'count(*)', '', $fname ); + $this->init( $count, $table ); + $this->log( "Processing $table..." ); + + $tableName = $this->dbr->tableName( $table ); + $sql = "SELECT * FROM $tableName"; + $result = $this->dbr->query( $sql, $fname ); + + while( $row = $this->dbr->fetchObject( $result ) ) { + $update = call_user_func( $callback, $row ); + if( is_array( $update ) ) { + if( !$this->dryrun ) { + $this->dbw->update( $table, + $update, + array( $key => $row->$key ), + $fname ); + } + $this->progress( 1 ); + } else { + $this->progress( 0 ); + } + } + $this->log( "Finished $table... $this->updated of $this->processed rows updated" ); + $this->dbr->freeResult( $result ); + } + + function buildImage() { + $callback = array( &$this, 'imageCallback' ); + $this->buildTable( 'image', 'img_name', $callback ); + } + + function imageCallback( $row ) { + if( $row->img_width ) { + // Already processed + return null; + } + + // Fill in the new image info fields + $info = $this->imageInfo( $row->img_name ); + + global $wgMemc, $wgDBname; + $key = $wgDBname . ":Image:" . md5( $row->img_name ); + $wgMemc->delete( $key ); + + return array( + 'img_width' => $info['width'], + 'img_height' => $info['height'], + 'img_bits' => $info['bits'], + 'img_media_type' => $info['media'], + 'img_major_mime' => $info['major'], + 'img_minor_mime' => $info['minor'] ); + } + + + function buildOldImage() { + $this->buildTable( 'oldimage', 'oi_archive_name', + array( &$this, 'oldimageCallback' ) ); + } + + function oldimageCallback( $row ) { + if( $row->oi_width ) { + return null; + } + + // Fill in the new image info fields + $info = $this->imageInfo( $row->oi_archive_name, 'wfImageArchiveDir', $row->oi_name ); + return array( + 'oi_width' => $info['width' ], + 'oi_height' => $info['height'], + 'oi_bits' => $info['bits' ] ); + } + + function crawlMissing() { + global $wgUploadDirectory, $wgHashedUploadDirectory; + if( $wgHashedUploadDirectory ) { + for( $i = 0; $i < 16; $i++ ) { + for( $j = 0; $j < 16; $j++ ) { + $dir = sprintf( '%s%s%01x%s%02x', + $wgUploadDirectory, + DIRECTORY_SEPARATOR, + $i, + DIRECTORY_SEPARATOR, + $i * 16 + $j ); + $this->crawlDirectory( $dir ); + } + } + } else { + $this->crawlDirectory( $wgUploadDirectory ); + } + } + + function crawlDirectory( $dir ) { + if( !file_exists( $dir ) ) { + return $this->log( "no directory, skipping $dir" ); + } + if( !is_dir( $dir ) ) { + return $this->log( "not a directory?! skipping $dir" ); + } + if( !is_readable( $dir ) ) { + return $this->log( "dir not readable, skipping $dir" ); + } + $source = opendir( $dir ); + if( $source === false ) { + return $this->log( "couldn't open dir, skipping $dir" ); + } + + $this->log( "crawling $dir" ); + while( false !== ( $filename = readdir( $source ) ) ) { + $fullpath = $dir . DIRECTORY_SEPARATOR . $filename; + if( is_dir( $fullpath ) ) { + continue; + } + if( is_link( $fullpath ) ) { + $this->log( "skipping symlink at $fullpath" ); + continue; + } + $this->checkMissingImage( $filename, $fullpath ); + } + closedir( $source ); + } + + function checkMissingImage( $filename, $fullpath ) { + $fname = 'ImageBuilder::checkMissingImage'; + $row = $this->dbw->selectRow( 'image', + array( 'img_name' ), + array( 'img_name' => $filename ), + $fname ); + + if( $row ) { + // already known, move on + return; + } else { + $this->addMissingImage( $filename, $fullpath ); + } + } + + function addMissingImage( $filename, $fullpath ) { + $fname = 'ImageBuilder::addMissingImage'; + + $size = filesize( $fullpath ); + $info = $this->imageInfo( $filename ); + $timestamp = $this->dbw->timestamp( filemtime( $fullpath ) ); + + global $wgContLang; + $altname = $wgContLang->checkTitleEncoding( $filename ); + if( $altname != $filename ) { + if( $this->dryrun ) { + $filename = $altname; + $this->log( "Estimating transcoding... $altname" ); + } else { + $filename = $this->renameFile( $filename ); + } + } + + if( $filename == '' ) { + $this->log( "Empty filename for $fullpath" ); + return; + } + + $fields = array( + 'img_name' => $filename, + 'img_size' => $size, + 'img_width' => $info['width'], + 'img_height' => $info['height'], + 'img_metadata' => '', // filled in on-demand + 'img_bits' => $info['bits'], + 'img_media_type' => $info['media'], + 'img_major_mime' => $info['major'], + 'img_minor_mime' => $info['minor'], + 'img_description' => '(recovered file, missing upload log entry)', + 'img_user' => 0, + 'img_user_text' => 'Conversion script', + 'img_timestamp' => $timestamp ); + if( !$this->dryrun ) { + $this->dbw->insert( 'image', $fields, $fname ); + } + $this->log( $fullpath ); + } +} + +$builder = new ImageBuilder( isset( $options['dry-run'] ) ); +if( isset( $options['missing'] ) ) { + $builder->crawlMissing(); +} else { + $builder->build(); +} + +?> diff --git a/maintenance/rebuildInterwiki.inc b/maintenance/rebuildInterwiki.inc new file mode 100644 index 00000000..d719fd40 --- /dev/null +++ b/maintenance/rebuildInterwiki.inc @@ -0,0 +1,260 @@ +<?php +/** + * Rebuild interwiki table using the file on meta and the language list + * Wikimedia specific! + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ + +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ +class Site { + var $suffix, $lateral, $url; + + function Site( $s, $l, $u ) { + $this->suffix = $s; + $this->lateral = $l; + $this->url = $u; + } + + function getURL( $lang ) { + $xlang = str_replace( '_', '-', $lang ); + return "http://$xlang.{$this->url}/wiki/\$1"; + } +} + +function getRebuildInterwikiSQL() { + global $langlist, $languageAliases, $prefixRewrites; + + # Multi-language sites + # db suffix => db suffix, iw prefix, hostname + $sites = array( + 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ), + 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ), + 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ), + 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ), + 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), + 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), + 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), + ); + + # List of language prefixes likely to be found in multi-language sites + $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) ); + + # List of all database names + $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) ); + + # Special-case hostnames + $specials = array( + 'sourceswiki' => 'sources.wikipedia.org', + 'quotewiki' => 'wikiquote.org', + 'textbookwiki' => 'wikibooks.org', + 'sep11wiki' => 'sep11.wikipedia.org', + 'metawiki' => 'meta.wikimedia.org', + 'commonswiki' => 'commons.wikimedia.org', + ); + + # Extra interwiki links that can't be in the intermap for some reason + $extraLinks = array( + array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ), + array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ), + array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ), + ); + + # Language aliases, usually configured as redirects to the real wiki in apache + # Interlanguage links are made directly to the real wiki + # Something horrible happens if you forget to list an alias here, I can't + # remember what + $languageAliases = array( + 'zh-cn' => 'zh', + 'zh-tw' => 'zh', + 'dk' => 'da', + 'nb' => 'no', + ); + + # Special case prefix rewrites, for the benefit of Swedish which uses s:t + # as an abbreviation for saint + $prefixRewrites = array( + 'svwiki' => array( 's' => 'src' ), + ); + + # Construct a list of reserved prefixes + $reserved = array(); + foreach ( $langlist as $lang ) { + $reserved[$lang] = 1; + } + foreach ( $languageAliases as $alias => $lang ) { + $reserved[$alias] = 1; + } + foreach( $sites as $site ) { + $reserved[$site->lateral] = 1; + } + + # Extract the intermap from meta + $intermap = wfGetHTTP( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 ); + $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); + + if ( !$lines || count( $lines ) < 2 ) { + wfDie( "m:Interwiki_map not found" ); + } + + $iwArray = array(); + + foreach ( $lines as $line ) { + if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) { + $prefix = strtolower( $matches[1] ); + $url = $matches[2]; + if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) { + $local = 1; + } else { + $local = 0; + } + + if ( empty( $reserved[$prefix] ) ) { + $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local ); + } + } + } + + $sql = "-- Generated by rebuildInterwiki.php"; + + + foreach ( $dblist as $db ) { + if ( isset( $specials[$db] ) ) { + # Special wiki + # Has interwiki links and interlanguage links to wikipedia + + $host = $specials[$db]; + $sql .= "\n--$host\n\n"; + $sql .= "USE $db;\n" . + "TRUNCATE TABLE interwiki;\n" . + "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n"; + $first = true; + + # Intermap links + foreach ( $iwArray as $iwEntry ) { + $sql .= makeLink( $iwEntry, $first, $db ); + } + + # Links to multilanguage sites + foreach ( $sites as $targetSite ) { + $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db ); + } + + # Interlanguage links to wikipedia + $sql .= makeLanguageLinks( $sites['wiki'], $first, $db ); + + # Extra links + foreach ( $extraLinks as $link ) { + $sql .= makeLink( $link, $first, $db ); + } + + $sql .= ";\n"; + } else { + # Find out which site this DB belongs to + $site = false; + foreach( $sites as $candidateSite ) { + $suffix = $candidateSite->suffix; + if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) { + $site = $candidateSite; + break; + } + } + if ( !$site ) { + print "Invalid database $db\n"; + continue; + } + $lang = $matches[1]; + $host = "$lang." . $site->url; + $sql .= "\n--$host\n\n"; + + $sql .= "USE $db;\n" . + "TRUNCATE TABLE interwiki;\n" . + "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n"; + $first = true; + + # Intermap links + foreach ( $iwArray as $iwEntry ) { + # Suppress links with the same name as the site + if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) || + ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) ) + { + $sql .= makeLink( $iwEntry, $first, $db ); + } + } + + # Lateral links + foreach ( $sites as $targetSite ) { + # Suppress link to self + if ( $targetSite->suffix != $site->suffix ) { + $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db ); + } + } + + # Interlanguage links + $sql .= makeLanguageLinks( $site, $first, $db ); + + # w link within wikipedias + # Other sites already have it as a lateral link + if ( $site->suffix == "wiki" ) { + $sql .= makeLink( array("w", "http://en.wikipedia.org/wiki/$1", 1), $first, $db ); + } + + # Extra links + foreach ( $extraLinks as $link ){ + $sql .= makeLink( $link, $first, $db ); + } + $sql .= ";\n\n"; + } + } + return $sql; +} + +# ------------------------------------------------------------------------------------------ + +# Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site +function makeLanguageLinks( &$site, &$first, $source ) { + global $langlist, $languageAliases; + + $sql = ""; + + # Actual languages with their own databases + foreach ( $langlist as $targetLang ) { + $sql .= makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source ); + } + + # Language aliases + foreach ( $languageAliases as $alias => $lang ) { + $sql .= makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source ); + } + return $sql; +} + +# Make SQL for a single link from an array +function makeLink( $entry, &$first, $source ) { + global $prefixRewrites; + + if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) ) { + $entry[0] = $prefixRewrites[$source][$entry[0]]; + } + + $sql = ""; + # Add comma + if ( $first ) { + $first = false; + } else { + $sql .= ",\n"; + } + $dbr =& wfGetDB( DB_SLAVE ); + $sql .= "(" . $dbr->makeList( $entry ) . ")"; + return $sql; +} + +?> diff --git a/maintenance/rebuildInterwiki.php b/maintenance/rebuildInterwiki.php new file mode 100644 index 00000000..19e081ad --- /dev/null +++ b/maintenance/rebuildInterwiki.php @@ -0,0 +1,31 @@ +<?php +/** + * Rebuild interwiki table using the file on meta and the language list + * Wikimedia specific! + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$oldCwd = getcwd(); + +$optionsWithArgs = array( "o" ); +include_once( "commandLine.inc" ); +include_once( "rebuildInterwiki.inc" ); +chdir( $oldCwd ); + +$sql = getRebuildInterwikiSQL(); + +# Output +if ( isset( $options['o'] ) ) { + # To file specified with -o + $file = fopen( $options['o'], "w" ); + fwrite( $file, $sql ); + fclose( $file ); +} else { + # To stdout + print $sql; +} + +?> diff --git a/maintenance/rebuildMessages.php b/maintenance/rebuildMessages.php new file mode 100644 index 00000000..d009098d --- /dev/null +++ b/maintenance/rebuildMessages.php @@ -0,0 +1,66 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$options = array( 'update' => null, 'rebuild' => null ); +require_once( "commandLine.inc" ); +include_once( "InitialiseMessages.inc" ); + +$wgTitle = Title::newFromText( "Rebuild messages script" ); + +if ( isset( $args[0] ) ) { + # Retain script compatibility + $response = array_shift( $args ); + if ( $response == "update" ) { + $response = 1; + } elseif ( $response == "rebuild" ) { + $response = 2; + } +} else { + $response = 0; +} +if ( isset( $args[0] ) ) { + $messages = loadLanguageFile( array_shift( $args ) ); +} else { + $messages = false; +} +if( isset( $options['update'] ) ) $response = 1; +if( isset( $options['rebuild'] ) ) $response = 2; + +if ( $response == 0 ) { + $dbr =& wfGetDB( DB_SLAVE ); + $row = $dbr->selectRow( "page", array("count(*) as c"), array("page_namespace" => NS_MEDIAWIKI) ); + print "Current namespace size: {$row->c}\n"; + + print <<<END +Usage: php rebuildMessages.php <action> [filename] + +Action must be one of: + --update Update messages to include latest additions to MessagesXX.php + --rebuild Delete all messages and reinitialise namespace + +If a message dump file is given, messages will be read from it to supplement +the defaults in MediaWiki's Language*.php. The file should contain a serialized +PHP associative array, as produced by dumpMessages.php. + + +END; + exit(0); +} + +switch ( $response ) { + case 1: + initialiseMessages( false, $messages ); + break; + case 2: + initialiseMessages( true, $messages ); + break; +} + +exit(); + +?> diff --git a/maintenance/rebuildall.php b/maintenance/rebuildall.php new file mode 100644 index 00000000..7c44e300 --- /dev/null +++ b/maintenance/rebuildall.php @@ -0,0 +1,39 @@ +<?php +/** + * Rebuild link tracking tables from scratch. This takes several + * hours, depending on the database size and server configuration. + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); + +#require_once( "rebuildlinks.inc" ); +require_once( "refreshLinks.inc" ); +require_once( "rebuildtextindex.inc" ); +require_once( "rebuildrecentchanges.inc" ); + +$database = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + +print "** Rebuilding fulltext search index (if you abort this will break searching; run this script again to fix):\n"; +dropTextIndex( $database ); +rebuildTextIndex( $database ); +createTextIndex( $database ); + +print "\n\n** Rebuilding recentchanges table:\n"; +rebuildRecentChangesTablePass1(); +rebuildRecentChangesTablePass2(); + +# Doesn't work anymore +# rebuildLinkTables(); + +# Use the slow incomplete one instead. It's designed to work in the background +print "\n\n** Rebuilding links tables -- this can take a long time. It should be safe to abort via ctrl+C if you get bored.\n"; +refreshLinks( 1 ); + +print "Done.\n"; +exit(); + +?> diff --git a/maintenance/rebuildrecentchanges.inc b/maintenance/rebuildrecentchanges.inc new file mode 100644 index 00000000..e077da52 --- /dev/null +++ b/maintenance/rebuildrecentchanges.inc @@ -0,0 +1,97 @@ +<?php +/** + * Rebuild recent changes table. + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +function rebuildRecentChangesTablePass1() +{ + $fname = 'rebuildRecentChangesTablePass1'; + $dbw =& wfGetDB( DB_MASTER ); + extract( $dbw->tableNames( 'recentchanges', 'cur', 'old' ) ); + + $dbw->delete( 'recentchanges', '*' ); + + print( "Loading from page and revision tables...\n" ); + + global $wgRCMaxAge; + $cutoff = time() - $wgRCMaxAge; + $dbw->insertSelect( 'recentchanges', array( 'page', 'revision' ), + array( + 'rc_timestamp' => 'rev_timestamp', + 'rc_cur_time' => 'rev_timestamp', + 'rc_user' => 'rev_user', + 'rc_user_text' => 'rev_user_text', + 'rc_namespace' => 'page_namespace', + 'rc_title' => 'page_title', + 'rc_comment' => 'rev_comment', + 'rc_minor' => 'rev_minor_edit', + 'rc_bot' => 0, + 'rc_new' => 'page_is_new', + 'rc_cur_id' => 'page_id', + 'rc_this_oldid' => 'rev_id', + 'rc_last_oldid' => 0, // is this ok? + 'rc_type' => $dbw->conditional( 'page_is_new != 0', RC_NEW, RC_EDIT ), + ), array( + 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $cutoff ) ), + 'rev_page=page_id' + ), $fname, + array(), // INSERT options + array( 'ORDER BY' => 'rev_timestamp', 'LIMIT' => 5000 ) // SELECT options + ); +} + +function rebuildRecentChangesTablePass2() +{ + $dbw =& wfGetDB( DB_MASTER ); + extract( $dbw->tableNames( 'recentchanges', 'revision' ) ); + + $ns = $id = $count = 0; + $title = $ct = ""; + + print( "Updating links...\n" ); + + # Fill in the rc_last_oldid field, which points to the previous edit + # + $sql = "SELECT rc_cur_id,rc_this_oldid,rc_timestamp FROM $recentchanges " . + "ORDER BY rc_cur_id,rc_timestamp"; + $res = $dbw->query( $sql, DB_MASTER ); + + $lastCurId = 0; + $lastOldId = 0; + while ( $obj = $dbw->fetchObject( $res ) ) { + $new = 0; + if( $obj->rc_cur_id != $lastCurId ) { + # Switch! Look up the previous last edit, if any + $lastCurId = intval( $obj->rc_cur_id ); + $emit = $obj->rc_timestamp; + $sql2 = "SELECT rev_id FROM $revision " . + "WHERE rev_page={$lastCurId} ". + "AND rev_timestamp<'{$emit}' ORDER BY rev_timestamp DESC LIMIT 1"; + $res2 = $dbw->query( $sql2 ); + if( $row = $dbw->fetchObject( $res2 ) ) { + $lastOldId = intval( $row->rev_id ); + } else { + # No previous edit + $lastOldId = 0; + $new = 1; + } + $dbw->freeResult( $res2 ); + } + if( $lastCurId == 0 ) { + print "Uhhh, something wrong? No curid\n"; + } else { + $sql3 = "UPDATE $recentchanges SET rc_last_oldid=$lastOldId,rc_new=$new,rc_type=$new " . + "WHERE rc_cur_id={$lastCurId} AND rc_this_oldid={$obj->rc_this_oldid}"; + $dbw->query( $sql3 ); + $lastOldId = intval( $obj->rc_this_oldid ); + } + } + $dbw->freeResult( $res ); +} + +?> diff --git a/maintenance/rebuildrecentchanges.php b/maintenance/rebuildrecentchanges.php new file mode 100644 index 00000000..77816cf8 --- /dev/null +++ b/maintenance/rebuildrecentchanges.php @@ -0,0 +1,25 @@ +<?php +/** + * Rebuild link tracking tables from scratch. This takes several + * hours, depending on the database size and server configuration. + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); +require_once( "rebuildrecentchanges.inc" ); +$wgTitle = Title::newFromText( "Rebuild recent changes script" ); + +$wgDBuser = $wgDBadminuser; +$wgDBpassword = $wgDBadminpassword; + +rebuildRecentChangesTablePass1(); +rebuildRecentChangesTablePass2(); + +print "Done.\n"; +exit(); + +?> diff --git a/maintenance/rebuildtextindex.inc b/maintenance/rebuildtextindex.inc new file mode 100644 index 00000000..5035b564 --- /dev/null +++ b/maintenance/rebuildtextindex.inc @@ -0,0 +1,68 @@ +<?php +require_once 'counter.php'; +/** + * Rebuild the fulltext search indexes. This may take a while + * depending on the database size and server configuration. + * + * Rebuilding is faster if you drop the index and recreate it, + * but that will prevent searches from working while it runs. + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +define( "RTI_CHUNK_SIZE", 500 ); + +function dropTextIndex( &$database ) +{ + $searchindex = $database->tableName( 'searchindex' ); + if ( $database->indexExists( "searchindex", "si_title" ) ) { + echo "Dropping index...\n"; + $sql = "ALTER TABLE $searchindex DROP INDEX si_title, DROP INDEX si_text"; + $database->query($sql, "dropTextIndex" ); + } +} + +function createTextIndex( &$database ) +{ + $searchindex = $database->tableName( 'searchindex' ); + echo "\nRebuild the index...\n"; + $sql = "ALTER TABLE $searchindex ADD FULLTEXT si_title (si_title), " . + "ADD FULLTEXT si_text (si_text)"; + $database->query($sql, "createTextIndex" ); +} + +function rebuildTextIndex( &$database ) +{ + extract( $database->tableNames( 'page', 'revision', 'text', 'searchindex' ) ); + + $sql = "SELECT MAX(page_id) AS count FROM $page"; + $res = $database->query($sql, "rebuildTextIndex" ); + $s = $database->fetchObject($res); + $count = $s->count; + echo "Rebuilding index fields for {$count} pages...\n"; + $n = 0; + + while ( $n < $count ) { + print_c( $n - 1, $n); + $end = $n + RTI_CHUNK_SIZE - 1; + $sql = "SELECT page_id, page_namespace, page_title, old_flags, old_text + FROM $page, $revision, $text + WHERE page_id BETWEEN $n AND $end + AND page_latest=rev_id + AND rev_text_id=old_id"; + $res = $database->query($sql, "rebuildTextIndex" ); + + while( $s = $database->fetchObject($res) ) { + $revtext = Revision::getRevisionText( $s ); + $u = new SearchUpdate( $s->page_id, $s->page_title, $revtext ); + $u->doUpdate(); + } + $database->freeResult( $res ); + $n += RTI_CHUNK_SIZE; + } +} + +?> diff --git a/maintenance/rebuildtextindex.php b/maintenance/rebuildtextindex.php new file mode 100644 index 00000000..54672d21 --- /dev/null +++ b/maintenance/rebuildtextindex.php @@ -0,0 +1,25 @@ +<?php +/** + * Rebuild search index table from scratch. This takes several + * hours, depending on the database size and server configuration. + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); +require_once( "rebuildtextindex.inc" ); +$wgTitle = Title::newFromText( "Rebuild text index script" ); + +$database = Database::newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname ); + +dropTextIndex( $database ); +rebuildTextIndex( $database ); +createTextIndex( $database ); + +print "Done.\n"; +exit(); + +?> diff --git a/maintenance/recount.sql b/maintenance/recount.sql new file mode 100644 index 00000000..d9fec31f --- /dev/null +++ b/maintenance/recount.sql @@ -0,0 +1,8 @@ +-- +-- Recalculate the article count +-- + +SELECT @foo:=COUNT(*) FROM /*$wgDBprefix*/cur + WHERE cur_namespace=0 AND cur_is_redirect=0 AND cur_text like '%[[%'; +UPDATE /*$wgDBprefix*/site_stats SET ss_good_articles=@foo, ss_total_pages=-1, ss_users=-1, ss_admins=-1; + diff --git a/maintenance/redundanttrans.php b/maintenance/redundanttrans.php new file mode 100644 index 00000000..de096863 --- /dev/null +++ b/maintenance/redundanttrans.php @@ -0,0 +1,28 @@ +<?php +/** + * Prints out messages that are no longer used. + * + * @package MediaWiki + * @subpackage Maintenance + */ + +require_once('commandLine.inc'); + +if ( 'en' == $wgLanguageCode ) { + print "Current selected language is English. Cannot check translations.\n"; + exit(); +} + +$count = $total = 0; +$msgarray = 'wgAllMessages' . ucfirst( $wgLanguageCode ); + +foreach ( $$msgarray as $code => $msg ) { + ++$total; + if ( ! array_key_exists( $code, $wgAllMessagesEn ) ) { + print "* $code\n"; + ++$count; + } +} + +print "{$count} messages of {$total} are redundant\n"; +?> diff --git a/maintenance/refreshImageCount.php b/maintenance/refreshImageCount.php new file mode 100644 index 00000000..15ce2b91 --- /dev/null +++ b/maintenance/refreshImageCount.php @@ -0,0 +1,25 @@ +<?php + +// Quickie hack; patch-ss_images.sql uses variables which don't +// replicate properly. + +require_once( "commandLine.inc" ); + +$dbw =& wfGetDB( DB_MASTER ); + +// Load the current value from the master +$count = $dbw->selectField( 'site_stats', 'ss_images' ); + +echo "$wgDBname: forcing ss_images to $count\n"; + +// First set to NULL so that it changes on the master +$dbw->update( 'site_stats', + array( 'ss_images' => null ), + array( 'ss_row_id' => 1 ) ); + +// Now this update will be forced to go out +$dbw->update( 'site_stats', + array( 'ss_images' => $count ), + array( 'ss_row_id' => 1 ) ); + +?>
\ No newline at end of file diff --git a/maintenance/refreshLinks.inc b/maintenance/refreshLinks.inc new file mode 100644 index 00000000..34ea6294 --- /dev/null +++ b/maintenance/refreshLinks.inc @@ -0,0 +1,131 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +define( "REPORTING_INTERVAL", 100 ); +#define( "REPORTING_INTERVAL", 1 ); + +function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0 ) { + global $wgUser, $wgParser, $wgUseImageResize, $wgUseTidy; + + $fname = 'refreshLinks'; + $dbr =& wfGetDB( DB_SLAVE ); + $start = intval( $start ); + + # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway) + $wgUser->setOption('math', MW_MATH_SOURCE); + + # Don't generate extension images (e.g. Timeline) + $wgParser->mTagHooks = array(); + + # Don't generate thumbnail images + $wgUseImageResize = false; + $wgUseTidy = false; + + if ( $newOnly ) { + print "Refreshing links from "; + $res = $dbr->select( 'page', + array( 'page_id' ), + array( + 'page_is_new' => 1, + "page_id > $start" ), + $fname + ); + $num = $dbr->numRows( $res ); + print "$num new articles...\n"; + + $i = 0; + while ( $row = $dbr->fetchObject( $res ) ) { + if ( !( ++$i % REPORTING_INTERVAL ) ) { + print "$i\n"; + wfWaitForSlaves( $maxLag ); + } + + fixLinksFromArticle( $row->page_id ); + } + } else { + print "Refreshing link table.\n"; + if ( !$end ) { + $end = $dbr->selectField( 'page', 'max(page_id)', false ); + } + print("Starting from page_id $start of $end.\n"); + + for ($id = $start; $id <= $end; $id++) { + + if ( !($id % REPORTING_INTERVAL) ) { + print "$id\n"; + wfWaitForSlaves( $maxLag ); + } + fixLinksFromArticle( $id ); + } + } +} + +function fixLinksFromArticle( $id ) { + global $wgTitle, $wgParser; + + $wgTitle = Title::newFromID( $id ); + $dbw =& wfGetDB( DB_MASTER ); + + $linkCache =& LinkCache::singleton(); + $linkCache->clear(); + + if ( is_null( $wgTitle ) ) { + return; + } + $dbw->begin(); + + $revision = Revision::newFromTitle( $wgTitle ); + if ( !$revision ) { + return; + } + + $options = new ParserOptions; + $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() ); + $update = new LinksUpdate( $wgTitle, $parserOutput, false ); + $update->doUpdate(); + $dbw->immediateCommit(); +} + +function deleteLinksFromNonexistent( $maxLag = 0 ) { + $fname = 'deleteLinksFromNonexistent'; + + wfWaitForSlaves( $maxLag ); + + $dbw =& wfGetDB( DB_WRITE ); + + $linksTables = array( + 'pagelinks' => 'pl_from', + 'imagelinks' => 'il_from', + 'categorylinks' => 'cl_from', + 'templatelinks' => 'tl_from', + 'externallinks' => 'el_from', + ); + + $page = $dbw->tableName( 'page' ); + + + foreach ( $linksTables as $table => $field ) { + if ( !$dbw->ping() ) { + print "DB disconnected, reconnecting..."; + while ( !$dbw->ping() ) { + print "."; + sleep(10); + } + print "\n"; + } + + $pTable = $dbw->tableName( $table ); + $sql = "DELETE $pTable FROM $pTable LEFT JOIN $page ON page_id=$field WHERE page_id IS NULL"; + + print "Deleting $table from non-existent articles..."; + $dbw->query( $sql, $fname ); + print " fixed " .$dbw->affectedRows() . " row(s)\n"; + } +} + +?> diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php new file mode 100644 index 00000000..e59124aa --- /dev/null +++ b/maintenance/refreshLinks.php @@ -0,0 +1,32 @@ +<?php +/** + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$optionsWithArgs = array( 'm', 'e' ); +require_once( "commandLine.inc" ); +require_once( "refreshLinks.inc" ); + +error_reporting( E_ALL & (~E_NOTICE) ); + +if ( !$options['dfn-only'] ) { + if ($args[0]) { + $start = (int)$args[0]; + } else { + $start = 1; + } + + refreshLinks( $start, $options['new-only'], $options['m'], $options['e'] ); +} +// this bit's bad for replication: disabling temporarily +// --brion 2005-07-16 +//deleteLinksFromNonexistent(); + +if ( $options['globals'] ) { + print_r( $GLOBALS ); +} + +?> diff --git a/maintenance/removeUnusedAccounts.inc b/maintenance/removeUnusedAccounts.inc new file mode 100644 index 00000000..ac15ebef --- /dev/null +++ b/maintenance/removeUnusedAccounts.inc @@ -0,0 +1,47 @@ +<?php + +/** + * Support functions for the removeUnusedAccounts maintenance script + * + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +/** + * Could the specified user account be deemed inactive? + * (No edits, no deleted edits, no log entries, no current/old uploads) + * + * @param $id User's ID + * @param $master Perform checking on the master + * @return bool + */ +function isInactiveAccount( $id, $master = false ) { + $dbo =& wfGetDB( $master ? DB_MASTER : DB_SLAVE ); + $fname = 'isInactiveAccount'; + $checks = array( 'revision' => 'rev', 'archive' => 'ar', 'logging' => 'log', + 'image' => 'img', 'oldimage' => 'oi' ); + $count = 0; + + $dbo->immediateBegin(); + foreach( $checks as $table => $fprefix ) { + $conds = array( $fprefix . '_user' => $id ); + $count += (int)$dbo->selectField( $table, 'COUNT(*)', $conds, $fname ); + } + $dbo->immediateCommit(); + + return $count == 0; +} + +/** + * Show help for the maintenance script + */ +function showHelp() { + echo( "Delete unused user accounts from the database.\n\n" ); + echo( "USAGE: php removeUnusedAccounts.php [--delete]\n\n" ); + echo( " --delete : Delete accounts which are discovered to be inactive\n" ); + echo( "\n" ); +} + +?>
\ No newline at end of file diff --git a/maintenance/removeUnusedAccounts.php b/maintenance/removeUnusedAccounts.php new file mode 100644 index 00000000..33b9a0c1 --- /dev/null +++ b/maintenance/removeUnusedAccounts.php @@ -0,0 +1,58 @@ +<?php + +/** + * Remove unused user accounts from the database + * An unused account is one which has made no edits + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +/** + * @todo Don't delete sysops or bureaucrats + */ + +$options = array( 'help', 'delete' ); +require_once( 'commandLine.inc' ); +require_once( 'removeUnusedAccounts.inc' ); +echo( "Remove Unused Accounts\n\n" ); +$fname = 'removeUnusedAccounts'; + +if( isset( $options['help'] ) ) { + showHelp(); + exit(); +} + +# Do an initial scan for inactive accounts and report the result +echo( "Checking for unused user accounts...\n" ); +$del = array(); +$dbr =& wfGetDB( DB_SLAVE ); +$res = $dbr->select( 'user', array( 'user_id', 'user_name' ), '', $fname ); +while( $row = $dbr->fetchObject( $res ) ) { + # Check the account, but ignore it if it's the primary administrator + if( $row->user_id > 1 && isInactiveAccount( $row->user_id, true ) ) { + # Inactive; print out the name and flag it + $del[] = $row->user_id; + echo( $row->user_name . "\n" ); + } +} +$count = count( $del ); +echo( "...found {$count}.\n" ); + +# If required, go back and delete each marked account +if( $count > 0 && isset( $options['delete'] ) ) { + echo( "\nDeleting inactive accounts..." ); + $dbw =& wfGetDB( DB_MASTER ); + $dbw->delete( 'user', array( 'user_id' => $del ), $fname ); + echo( "done.\n" ); + # Update the site_stats.ss_users field + $users = $dbw->selectField( 'user', 'COUNT(*)', array(), $fname ); + $dbw->update( 'site_stats', array( 'ss_users' => $users ), array( 'ss_row_id' => 1 ), $fname ); +} else { + if( $count > 0 ) + echo( "\nRun the script again with --delete to remove them from the database.\n" ); +} +echo( "\n" ); + +?> diff --git a/maintenance/renderDump.php b/maintenance/renderDump.php new file mode 100644 index 00000000..10986f2c --- /dev/null +++ b/maintenance/renderDump.php @@ -0,0 +1,103 @@ +<?php +/** + * Take page text out of an XML dump file and render basic HTML out to files. + * This is *NOT* suitable for publishing or offline use; it's intended for + * running comparitive tests of parsing behavior using real-world data. + * + * Templates etc are pulled from the local wiki database, not from the dump. + * + * Copyright (C) 2006 Brion Vibber <brion@pobox.com> + * http://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @package MediaWiki + * @subpackage Maintenance + */ + +$optionsWithArgs = array( 'report' ); + +require_once( 'commandLine.inc' ); +require_once( 'SpecialImport.php' ); + +class DumpRenderer { + function __construct( $dir ) { + $this->stderr = fopen( "php://stderr", "wt" ); + $this->outputDirectory = $dir; + $this->count = 0; + } + + function handleRevision( $rev ) { + $title = $rev->getTitle(); + if (!$title) { + fprintf( $this->stderr, "Got bogus revision with null title!" ); + return; + } + $display = $title->getPrefixedText(); + + $this->count++; + + $sanitized = rawurlencode( $display ); + $filename = sprintf( "%s/wiki-%07d-%s.html", + $this->outputDirectory, + $this->count, + $sanitized ); + fprintf( $this->stderr, "%s\n", $filename, $display ); + + // fixme + $user = new User(); + $parser = new Parser(); + $options = ParserOptions::newFromUser( $user ); + + $output = $parser->parse( $rev->getText(), $title, $options ); + + file_put_contents( $filename, + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" " . + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" . + "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" . + "<head>\n" . + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" . + "<title>" . htmlspecialchars( $display ) . "</title>\n" . + "</head>\n" . + "<body>\n" . + $output->getText() . + "</body>\n" . + "</html>" ); + } + + function run() { + $this->startTime = wfTime(); + + $file = fopen( 'php://stdin', 'rt' ); + $source = new ImportStreamSource( $file ); + $importer = new WikiImporter( $source ); + + $importer->setRevisionCallback( + array( &$this, 'handleRevision' ) ); + + return $importer->doImport(); + } +} + +if( isset( $options['output-dir'] ) ) { + $dir = $options['output-dir']; +} else { + wfDie( "Must use --output-dir=/some/dir\n" ); +} +$render = new DumpRenderer( $dir ); +$render->run(); + +?> diff --git a/maintenance/runJobs.php b/maintenance/runJobs.php new file mode 100644 index 00000000..d72addc7 --- /dev/null +++ b/maintenance/runJobs.php @@ -0,0 +1,20 @@ +<?php + +require_once( 'commandLine.inc' ); +require_once( "$IP/includes/JobQueue.php" ); +require_once( "$IP/includes/FakeTitle.php" ); + +// Trigger errors on inappropriate use of $wgTitle +$wgTitle = new FakeTitle; + +$dbw =& wfGetDB( DB_MASTER ); +while ( $dbw->selectField( 'job', 'count(*)', '', 'runJobs.php' ) ) { + while ( false != ($job = Job::pop()) ) { + wfWaitForSlaves( 5 ); + print $job->id . " " . $job->toString() . "\n"; + if ( !$job->run() ) { + print "Error: {$job->error}\n"; + } + } +} +?> diff --git a/maintenance/showJobs.php b/maintenance/showJobs.php new file mode 100644 index 00000000..98e47de2 --- /dev/null +++ b/maintenance/showJobs.php @@ -0,0 +1,19 @@ +<?php +/** + * Based on runJobs.php + * + * @author Tim Starling + * @author Ashar Voultoiz + */ +require_once( 'commandLine.inc' ); +require_once( "$IP/includes/JobQueue.php" ); +require_once( "$IP/includes/FakeTitle.php" ); + +// Trigger errors on inappropriate use of $wgTitle +$wgTitle = new FakeTitle; + +$dbw =& wfGetDB( DB_MASTER ); +$count = $dbw->selectField( 'job', 'count(*)', '', 'runJobs.php' ); +print $count."\n"; + +?> diff --git a/maintenance/splitLanguageFiles.inc b/maintenance/splitLanguageFiles.inc new file mode 100644 index 00000000..c2500778 --- /dev/null +++ b/maintenance/splitLanguageFiles.inc @@ -0,0 +1,1168 @@ +<?php +/** + * This is an experimental list. It will later be used with a script to split + * the languages files in several parts then the message system will only load + * in memory the parts which are actually needed. + * + * Generated using: grep -r foobar * + * + * $commonMsg is the default array. Other arrays will only be loaded if needed. + */ +$installerMsg = array ( +'mainpagetext', +'mainpagedocfooter', +); + +$ActionMsg = array ( +'delete' => array( + 'delete', + 'deletethispage', + 'undelete_short1', + 'undelete_short', + 'undelete', + 'undeletepage', + 'undeletepagetext', + 'undeletearticle', + 'undeleterevisions', + 'undeletehistory', + 'undeleterevision', + 'undeletebtn', + 'undeletedarticle', + 'undeletedrevisions', + 'undeletedtext', + ), +'move' => array( + 'move', + 'movethispage', +), +'revert' => array( + +), +'protect' => array( + 'confirmprotect', + 'confirmprotecttext', + 'confirmunprotect', + 'confirmunprotecttext', + 'protect', + 'protectcomment', + 'protectmoveonly', + 'protectpage', + 'protectsub', + 'protectthispage', + 'unprotect', + 'unprotectthispage', + 'unprotectsub', + 'unprotectcomment', +), +); + +$CreditsMsg = array( +'anonymous', +'siteuser', +'lastmodifiedby', +'and', +'othercontribs', +'others', +'siteusers', +'creditspage', +'nocredits', +); + +// When showing differences +$DifferenceMsg = array( +'previousdiff', +'nextdiff', +); + +// used on page edition +$EditMsg = array( +'bold_sample', +'bold_tip', +'italic_sample', +'italic_tip', +'link_sample', +'link_tip', +'extlink_sample', +'extlink_tip', +'headline_sample', +'headline_tip', +'math_sample', +'math_tip', +'nowiki_sample', +'nowiki_tip', +'image_sample', +'image_tip', +'media_sample', +'media_tip', +'sig_tip', +'hr_tip', + +'accesskey-search', +'accesskey-minoredit', +'accesskey-save', +'accesskey-preview', +'accesskey-diff', +'accesskey-compareselectedversions', +'tooltip-search', +'tooltip-minoredit', +'tooltip-save', +'tooltip-preview', +'tooltip-diff', +'tooltip-compareselectedversions', +'tooltip-watch', + +'copyrightwarning', +'copyrightwarning2', +'editconflict', +'editing', +'editingcomment', +'editingold', +'editingsection', +'explainconflict', +'infobox', +'infobox_alert', +'longpagewarning', +'nonunicodebrowser', +'previewconflict', +'previewnote', +'protectedpagewarning', +'readonlywarning', +'spamprotectiontitle', +'spamprotectiontext', +'spamprotectionmatch', +'templatesused', +'yourdiff', +'yourtext', +); + +// Per namespace +$NamespaceCategory = array ( +'category_header', +'categoryarticlecount', +'categoryarticlecount1', +'listingcontinuesabbrev', +'subcategories', +'subcategorycount', +'subcategorycount1', +'usenewcategorypage', +); + +$NamespaceImage = array ( +'deletedrevision', +'edit-externally', +'edit-externally-help', +'showbigimage', +); + +$NamespaceSpecialMsg = array( +'nosuchspecialpage', +'nospecialpagetext', +); + + + +// per special pages +$SpecialAllMessages = array( +'allmessages', +'allmessagesname', +'allmessagesdefault', +'allmessagescurrent', +'allmessagestext', +'allmessagesnotsupportedUI', +'allmessagesnotsupportedDB', +); + + +$SpecialAllPages = array( +'articlenamespace', +'allpagesformtext1', +'allpagesformtext2', +'allarticles', +'allpagesprev', +'allpagesnext', +'allpagesnamespace', +'allpagessubmit', +); + + +$SpecialAskSQLMsg = array( +'asksql', +'asksqltext', +'sqlislogged', +'sqlquery', +'querybtn', +'selectonly', +'querysuccessful', +); + +$SpecialBlockip = array( +'blockip', +'blockiptext', +'range_block_disabled', +'ipb_expiry_invalid', +'ip_range_invalid', +'ipbexpiry', +'ipbsubmit', +); + +$SpecialContributions = array( +'contribsub', +'contributionsall', +'newbies', +'nocontribs', +'ucnote', +'uclinks', +'uctop', +); + +$SpecialExportMsg = array ( +'export', +'exporttext', +'exportcuronly', +); + +$SpecialImagelist = array( +'imagelistall', +); + +$SpecialImportMsg = array ( +'import', +'importtext', +'importfailed', +'importnotext', +'importsuccess', +'importhistoryconflict', +); + +$SpecialLockdbMsg = array( +'lockdb', +'unlockdb', +'lockdbtext', +'unlockdbtext', +'lockconfirm', +'unlockconfirm', +'lockbtn', +'unlockbtn', +'locknoconfirm', +'lockdbsuccesssub', +'unlockdbsuccesssub', +'lockdbsuccesstext', +'unlockdbsuccesstext', +); + +$SpecialLogMsg = array( +'specialloguserlabel', +'speciallogtitlelabel', +); + +$SpecialMaintenance = array( +'maintenance', +'maintnancepagetext', +'maintenancebacklink', +'disambiguations', +'disambiguationspage', +'disambiguationstext', +'doubleredirects', +'doubleredirectstext', +'brokenredirects', +'brokenredirectstext', +'selflinks', +'selflinkstext', +'mispeelings', +'mispeelingstext', +'mispeelingspage', +'missinglanguagelinks', +'missinglanguagelinksbutton', +'missinglanguagelinkstext', +); + +$SpecialMakeSysopMsg = array ( +'already_bureaucrat', +'already_sysop', +'makesysop', +'makesysoptitle', +'makesysoptext', +'makesysopname', +'makesysopsubmit', +'makesysopok', +'makesysopfail', +'rights', +'set_rights_fail', +'set_user_rights', +'user_rights_set', +); + +$SpecialMovepageMsg = array( +'newtitle', +'movearticle', +'movenologin', +'movenologintext', +'movepage', +'movepagebtn', +'movepagetalktext', +'movepagetext', +'movetalk', +'pagemovedsub', +'pagemovedtext', +'talkexists', +'talkpagemoved', +'talkpagenotmoved', + +); + +$SpecialPreferencesMsg = array( +'tog-underline', +'tog-highlightbroken', +'tog-justify', +'tog-hideminor', +'tog-usenewrc', +'tog-numberheadings', +'tog-showtoolbar', +'tog-editondblclick', +'tog-editsection', +'tog-editsectiononrightclick', +'tog-showtoc', +'tog-rememberpassword', +'tog-editwidth', +'tog-watchdefault', +'tog-minordefault', +'tog-previewontop', +'tog-previewonfirst', +'tog-nocache', +'tog-enotifwatchlistpages', +'tog-enotifusertalkpages', +'tog-enotifminoredits', +'tog-enotifrevealaddr', +'tog-shownumberswatching', +'tog-rcusemodstyle', +'tog-showupdated', +'tog-fancysig', +'tog-externaleditor', + +'imagemaxsize', +'prefs-help-email', +'prefs-help-email-enotif', +'prefs-help-realname', +'prefs-help-userdata', +'prefs-misc', +'prefs-personal', +'prefs-rc', +'resetprefs', +'saveprefs', +'oldpassword', +'newpassword', +'retypenew', +'textboxsize', +'rows', +'columns', +'searchresultshead', +'resultsperpage', +'contextlines', +'contextchars', +'stubthreshold', +'recentchangescount', +'savedprefs', +'timezonelegend', +'timezonetext', +'localtime', +'timezoneoffset', +'servertime', +'guesstimezone', +'emailflag', +'defaultns', +'default', +); + +$SpecialRecentchangesMsg = array( +'changes', +'recentchanges', +'recentchanges-url', +'recentchangestext', +'rcloaderr', +'rcnote', +'rcnotefrom', +'rclistfrom', +'showhideminor', +'rclinks', +'rchide', +'rcliu', +'diff', +'hist', +'hide', +'show', +'tableform', +'listform', +'nchanges', +'minoreditletter', +'newpageletter', +'sectionlink', +'number_of_watching_users_RCview', +'number_of_watching_users_pageview', +'recentchangesall', +); + +$SpecialRecentchangeslinkedMsg = array( +'rclsub', +); + +$SpecialSearchMsg = array( +'searchresults', +'searchresulttext', +'searchquery', +'badquery', +'badquerytext', +'matchtotals', +'nogomatch', +'titlematches', +'notitlematches', +'textmatches', +'notextmatches', +); + +$SpecialSitesettingsMsg = array( +'sitesettings', +'sitesettings-features', +'sitesettings-permissions', +'sitesettings-memcached', +'sitesettings-debugging', +'sitesettings-caching', +'sitesettings-wgShowIPinHeader', +'sitesettings-wgUseDatabaseMessages', +'sitesettings-wgUseCategoryMagic', +'sitesettings-wgUseCategoryBrowser', +'sitesettings-wgHitcounterUpdateFreq', +'sitesettings-wgAllowExternalImages', +'sitesettings-permissions-readonly', +'sitesettings-permissions-whitelist', +'sitesettings-permissions-banning', +'sitesettings-permissions-miser', +'sitesettings-wgReadOnly', +'sitesettings-wgReadOnlyFile', +'sitesettings-wgWhitelistEdit', +'sitesettings-wgWhitelistRead', +'sitesettings-wgWhitelistAccount-user', +'sitesettings-wgWhitelistAccount-sysop', +'sitesettings-wgWhitelistAccount-developer', +'sitesettings-wgSysopUserBans', +'sitesettings-wgSysopRangeBans', +'sitesettings-wgDefaultBlockExpiry', +'sitesettings-wgMiserMode', +'sitesettings-wgDisableQueryPages', +'sitesettings-wgUseWatchlistCache', +'sitesettings-wgWLCacheTimeout', +'sitesettings-cookies', +'sitesettings-performance', +'sitesettings-images', +); + +$SpecialStatisticsMsg = array( +'statistics', +'sitestats', +'userstats', +'sitestatstext', +'userstatstext', +); + +$SpecialUndelte = array( +'deletepage', +); + +$SpecialUploadMsg = array( +'affirmation', +'badfilename', +'badfiletype', +'emptyfile', +'fileexists', +'filedesc', +'filename', +'filesource', +'filestatus', +'fileuploaded', +'ignorewarning', +'illegalfilename', +'largefile', +'minlength', +'noaffirmation', +'reupload', +'reuploaddesc', +'savefile', +'successfulupload', +'upload', +'uploadbtn', +'uploadcorrupt', +'uploaddisabled', +'uploadfile', +'uploadedimage', +'uploaderror', +'uploadlink', +'uploadlog', +'uploadlogpage', +'uploadlogpagetext', +'uploadnologin', +'uploadnologintext', +'uploadtext', +'uploadwarning', +); + +$SpecialUserlevelsMsg = array( +'saveusergroups', +'userlevels-editusergroup', +'userlevels-groupsavailable', +'userlevels-groupshelp', +'userlevels-groupsmember', +); + +$SpecialUserloginMsg = array( +'acct_creation_throttle_hit', +'loginend', +'loginsuccesstitle', +'loginsuccess', +'nocookiesnew', +'nocookieslogin', +'noemail', +'noname', +'nosuchuser', +'mailmypassword', +'mailmypasswordauthent', +'passwordremindermailsubject', +'passwordremindermailbody', +'passwordsent', +'passwordsentforemailauthentication', +'userexists', +'wrongpassword', +); + +$SpecialValidateMsg = array( +'val_yes', +'val_no', +'val_revision', +'val_time', +'val_list_header', +'val_add', +'val_del', +'val_warning', +'val_rev_for', +'val_rev_stats_link', +'val_iamsure', +'val_clear_old', +'val_merge_old', +'val_form_note', +'val_noop', +'val_percent', +'val_percent_single', +'val_total', +'val_version', +'val_tab', +'val_this_is_current_version', +'val_version_of', +'val_table_header', +'val_stat_link_text', +'val_view_version', +'val_validate_version', +'val_user_validations', +'val_no_anon_validation', +'val_validate_article_namespace_only', +'val_validated', +'val_article_lists', +'val_page_validation_statistics', +); + +$SpecialVersionMsg = array( +'special_version_prefix', +'special_version_postfix' +); + +$SpecialWatchlistMsg = array( +'watchlistall1', +'watchlistall2', +'wlnote', +'wlshowlast', +'wlsaved', +'wlhideshowown', +'wlshow', +'wlhide', +); + +$SpecialWhatlinkshereMsg = array( +'linklistsub', +'nolinkshere', +'isredirect', +); + + +$commonMsg = array ( +'sunday', +'monday', +'tuesday', +'wednesday', +'thursday', +'friday', +'saturday', +'january', +'february', +'march', +'april', +'may_long', +'june', +'july', +'august', +'september', +'october', +'november', +'december', +'jan', +'feb', +'mar', +'apr', +'may', +'jun', +'jul', +'aug', +'sep', +'oct', +'nov', +'dec', +'categories', +'category', +'linktrail', +'mainpage', +'portal', +'portal-url', +'about', +'aboutsite', +'aboutpage', +'article', +'help', +'helppage', +'wikititlesuffix', +'bugreports', +'bugreportspage', +'sitesupport', +'sitesupport-url', +'faq', +'faqpage', +'edithelp', +'newwindow', +'edithelppage', +'cancel', +'qbfind', +'qbbrowse', +'qbedit', +'qbpageoptions', +'qbpageinfo', +'qbmyoptions', +'qbspecialpages', +'moredotdotdot', +'mypage', +'mytalk', +'anontalk', +'navigation', +'metadata', +'metadata_page', +'currentevents', +'currentevents-url', +'disclaimers', +'disclaimerpage', +'errorpagetitle', +'returnto', +'tagline', +'whatlinkshere', +'search', +'go', +'history', +'history_short', +'info_short', +'printableversion', +'edit', +'editthispage', +'newpage', +'talkpage', +'specialpage', +'personaltools', +'postcomment', +'addsection', +'articlepage', +'subjectpage', +'talk', +'toolbox', +'userpage', +'wikipediapage', +'imagepage', +'viewtalkpage', +'otherlanguages', +'redirectedfrom', +'lastmodified', +'viewcount', +'copyright', +'poweredby', +'printsubtitle', +'protectedpage', +'administrators', +'sysoptitle', +'sysoptext', +'developertitle', +'developertext', +'bureaucrattitle', +'bureaucrattext', +'nbytes', +'ok', +'sitetitle', +'pagetitle', +'sitesubtitle', +'retrievedfrom', +'newmessages', +'newmessageslink', +'editsection', +'toc', +'showtoc', +'hidetoc', +'thisisdeleted', +'restorelink', +'feedlinks', +'sitenotice', +'nstab-main', +'nstab-user', +'nstab-media', +'nstab-special', +'nstab-wp', +'nstab-image', +'nstab-mediawiki', +'nstab-template', +'nstab-help', +'nstab-category', +'nosuchaction', +'nosuchactiontext', + + +'error', +'databaseerror', +'dberrortext', +'dberrortextcl', +'noconnect', +'nodb', +'cachederror', +'laggedslavemode', +'readonly', +'enterlockreason', +'readonlytext', +'missingarticle', +'internalerror', +'filecopyerror', +'filerenameerror', +'filedeleteerror', +'filenotfound', +'unexpected', +'formerror', +'badarticleerror', +'cannotdelete', +'badtitle', +'badtitletext', +'perfdisabled', +'perfdisabledsub', +'perfcached', +'wrong_wfQuery_params', +'viewsource', +'protectedtext', +'seriousxhtmlerrors', +'logouttitle', +'logouttext', +'welcomecreation', + +'loginpagetitle', +'yourname', +'yourpassword', +'yourpasswordagain', +'newusersonly', +'remembermypassword', +'loginproblem', +'alreadyloggedin', +'login', +'loginprompt', +'userlogin', +'logout', +'userlogout', +'notloggedin', +'createaccount', +'createaccountmail', +'badretype', + +'youremail', +'yourrealname', +'yourlanguage', +'yourvariant', +'yournick', +'emailforlost', +'loginerror', +'nosuchusershort', + +'mailerror', +'emailauthenticated', +'emailnotauthenticated', +'invalidemailaddress', +'disableduntilauthent', +'disablednoemail', + +'summary', +'subject', +'minoredit', +'watchthis', +'savearticle', +'preview', +'showpreview', +'showdiff', +'blockedtitle', +'blockedtext', +'whitelistedittitle', +'whitelistedittext', +'whitelistreadtitle', +'whitelistreadtext', +'whitelistacctitle', +'whitelistacctext', +'loginreqtitle', +'loginreqtext', +'accmailtitle', +'accmailtext', +'newarticle', +'newarticletext', +'talkpagetext', +'anontalkpagetext', +'noarticletext', +'clearyourcache', +'usercssjsyoucanpreview', +'usercsspreview', +'userjspreview', +'updated', +'note', +'storedversion', // not used ? Editpage ? +'revhistory', +'nohistory', +'revnotfound', +'revnotfoundtext', +'loadhist', +'currentrev', +'revisionasof', +'revisionasofwithlink', +'previousrevision', +'nextrevision', +'currentrevisionlink', +'cur', +'next', +'last', +'orig', +'histlegend', +'history_copyright', +'difference', +'loadingrev', +'lineno', +'editcurrent', +'selectnewerversionfordiff', +'selectolderversionfordiff', +'compareselectedversions', + +'prevn', +'nextn', +'viewprevnext', +'showingresults', +'showingresultsnum', +'nonefound', +'powersearch', +'powersearchtext', +'searchdisabled', +'googlesearch', +'blanknamespace', +'preferences', +'prefsnologin', +'prefsnologintext', +'prefslogintext', +'prefsreset', +'qbsettings', +'qbsettingsnote', +'changepassword', +'skin', +'math', +'dateformat', + +'math_failure', +'math_unknown_error', +'math_unknown_function', +'math_lexing_error', +'math_syntax_error', +'math_image_error', +'math_bad_tmpdir', +'math_bad_output', +'math_notexvc', + + + + + + +'grouplevels-lookup-group', +'grouplevels-group-edit', +'editgroup', +'addgroup', +'userlevels-lookup-user', +'userlevels-user-editname', +'editusergroup', +'grouplevels-editgroup', +'grouplevels-addgroup', +'grouplevels-editgroup-name', +'grouplevels-editgroup-description', +'savegroup', + +// common to several pages +'copyrightpage', +'copyrightpagename', +'imagelist', +'imagelisttext', +'ilshowmatch', +'ilsubmit', +'showlast', +'byname', +'bydate', +'bysize', + + + +'imgdelete', +'imgdesc', +'imglegend', +'imghistory', +'revertimg', +'deleteimg', +'deleteimgcompletely', +'imghistlegend', +'imagelinks', +'linkstoimage', +'nolinkstoimage', + +// unused ?? +'uploadedfiles', +'getimagelist', + + +'sharedupload', +'shareduploadwiki', + +// Special pages names +'orphans', +'geo', +'validate', +'lonelypages', +'uncategorizedpages', +'uncategorizedcategories', +'unusedimages', +'popularpages', +'nviews', +'wantedpages', +'nlinks', +'allpages', +'randompage', +'randompage-url', +'shortpages', +'longpages', +'deadendpages', +'listusers', +'specialpages', +'spheading', +'restrictedpheading', +'recentchangeslinked', + + +'debug', +'newpages', +'ancientpages', +'intl', +'unusedimagestext', +'booksources', +'categoriespagetext', +'data', +'userlevels', +'grouplevels', +'booksourcetext', +'isbn', +'rfcurl', +'pubmedurl', +'alphaindexline', +'version', +'log', +'alllogstext', +'nextpage', +'mailnologin', +'mailnologintext', +'emailuser', +'emailpage', +'emailpagetext', +'usermailererror', +'defemailsubject', +'noemailtitle', +'noemailtext', +'emailfrom', +'emailto', +'emailsubject', +'emailmessage', +'emailsend', +'emailsent', +'emailsenttext', +'watchlist', +'watchlistsub', +'nowatchlist', +'watchnologin', +'watchnologintext', +'addedwatch', +'addedwatchtext', +'removedwatch', +'removedwatchtext', +'watch', +'watchthispage', +'unwatch', +'unwatchthispage', +'notanarticle', +'watchnochange', +'watchdetails', +'watchmethod-recent', +'watchmethod-list', +'removechecked', +'watchlistcontains', +'watcheditlist', +'removingchecked', +'couldntremove', +'iteminvalidname', + +'updatedmarker', +'email_notification_mailer', +'email_notification_infotext', +'email_notification_reset', +'email_notification_newpagetext', +'email_notification_to', +'email_notification_subject', +'email_notification_lastvisitedrevisiontext', +'email_notification_body', + +'confirm', +'excontent', +'exbeforeblank', +'exblank', +'confirmdelete', +'deletesub', +'historywarning', +'confirmdeletetext', +'actioncomplete', +'deletedtext', +'deletedarticle', +'dellogpage', +'dellogpagetext', +'deletionlog', +'reverted', +'deletecomment', +'imagereverted', +'rollback', +'rollback_short', +'rollbacklink', +'rollbackfailed', +'cantrollback', +'alreadyrolled', +'revertpage', +'editcomment', +'sessionfailure', + +'protectlogpage', +'protectlogtext', + +'protectedarticle', +'unprotectedarticle', + +'contributions', +'mycontris', +'notargettitle', // not used ? +'notargettext', // not used ? + +'linkshere', + +'ipaddress', +'ipadressorusername', // not used ? +'ipbreason', + +'badipaddress', +'noblockreason', +'blockipsuccesssub', +'blockipsuccesstext', +'unblockip', +'unblockiptext', +'ipusubmit', +'ipusuccess', +'ipblocklist', +'blocklistline', +'blocklink', +'unblocklink', +'contribslink', +'autoblocker', +'blocklogpage', +'blocklogentry', +'blocklogtext', +'unblocklogentry', // not used ? + +'proxyblocker', +'proxyblockreason', +'proxyblocksuccess', +'sorbs', +'sorbsreason', + +'setbureaucratflag', +'bureaucratlog', +'rightslogtext', +'bureaucratlogentry', + +'articleexists', // not used ? + +'movedto', +'1movedto2', +'1movedto2_redir', +'movelogpage', +'movelogpagetext', + +'thumbnail-more', +'missingimage', +'filemissing', +'Monobook.css', +'nodublincore', +'nocreativecommons', +'notacceptable', + +// used in Article:: +'infosubtitle', +'numedits', +'numtalkedits', +'numwatchers', +'numauthors', +'numtalkauthors', + +// not used ? +'mw_math_png', +'mw_math_simple', +'mw_math_html', +'mw_math_source', +'mw_math_modern', +'mw_math_mathml', + +// Patrolling +'markaspatrolleddiff', +'markaspatrolledlink', +'markaspatrolledtext', +'markedaspatrolled', +'markedaspatrolledtext', +'rcpatroldisabled', // not used ? +'rcpatroldisabledtext', // not used ? + +'Monobook.js', +'newimages', +'noimages', +'variantname-zh-cn', +'variantname-zh-tw', +'variantname-zh-hk', +'variantname-zh-sg', +'variantname-zh', +'zhconversiontable', +'passwordtooshort', // sp preferences / userlogin +); +?> diff --git a/maintenance/splitLanguageFiles.php b/maintenance/splitLanguageFiles.php new file mode 100644 index 00000000..b80f38fc --- /dev/null +++ b/maintenance/splitLanguageFiles.php @@ -0,0 +1,13 @@ +<?php +/** + * splitLanguageFiles + * Should read each of the languages files then split them in several subpart + * under ./languages/XX/ according to the arrays in splitLanguageFiles.inc . + * + * Also need to rewrite the wfMsg system / message-cache. + */ + +include('commandLine.inc'); + + +?> diff --git a/maintenance/stats.php b/maintenance/stats.php new file mode 100644 index 00000000..8ebc3823 --- /dev/null +++ b/maintenance/stats.php @@ -0,0 +1,45 @@ +<?php +require_once('commandLine.inc'); + +print "Requests\n"; +$session = intval($wgMemc->get("$wgDBname:stats:request_with_session")); +$noSession = intval($wgMemc->get("$wgDBname:stats:request_without_session")); +$total = $session + $noSession; +printf( "with session: %-10d %6.2f%%\n", $session, $session/$total*100 ); +printf( "without session: %-10d %6.2f%%\n", $noSession, $noSession/$total*100 ); +printf( "total: %-10d %6.2f%%\n", $total, 100 ); + + +print "\nParser cache\n"; +$hits = intval($wgMemc->get("$wgDBname:stats:pcache_hit")); +$invalid = intval($wgMemc->get("$wgDBname:stats:pcache_miss_invalid")); +$expired = intval($wgMemc->get("$wgDBname:stats:pcache_miss_expired")); +$absent = intval($wgMemc->get("$wgDBname:stats:pcache_miss_absent")); +$stub = intval($wgMemc->get("$wgDBname:stats:pcache_miss_stub")); +$total = $hits + $invalid + $expired + $absent + $stub; +printf( "hits: %-10d %6.2f%%\n", $hits, $hits/$total*100 ); +printf( "invalid: %-10d %6.2f%%\n", $invalid, $invalid/$total*100 ); +printf( "expired: %-10d %6.2f%%\n", $expired, $expired/$total*100 ); +printf( "absent: %-10d %6.2f%%\n", $absent, $absent/$total*100 ); +printf( "stub threshold: %-10d %6.2f%%\n", $stub, $stub/$total*100 ); +printf( "total: %-10d %6.2f%%\n", $total, 100 ); + +$hits = intval($wgMemc->get("$wgDBname:stats:image_cache_hit")); +$misses = intval($wgMemc->get("$wgDBname:stats:image_cache_miss")); +$updates = intval($wgMemc->get("$wgDBname:stats:image_cache_update")); +$total = $hits + $misses; +print("\nImage cache\n"); +printf( "hits: %-10d %6.2f%%\n", $hits, $hits/$total*100 ); +printf( "misses: %-10d %6.2f%%\n", $misses, $misses/$total*100 ); +printf( "updates: %-10d\n", $updates ); + +$hits = intval($wgMemc->get("$wgDBname:stats:diff_cache_hit")); +$misses = intval($wgMemc->get("$wgDBname:stats:diff_cache_miss")); +$uncacheable = intval($wgMemc->get("$wgDBname:stats:diff_uncacheable")); +$total = $hits + $misses + $uncacheable; +print("\nDiff cache\n"); +printf( "hits: %-10d %6.2f%%\n", $hits, $hits/$total*100 ); +printf( "misses: %-10d %6.2f%%\n", $misses, $misses/$total*100 ); +printf( "uncacheable: %-10d %6.2f%%\n", $uncacheable, $uncacheable/$total*100 ); + +?> diff --git a/maintenance/storage/blobs.sql b/maintenance/storage/blobs.sql new file mode 100644 index 00000000..5782ac47 --- /dev/null +++ b/maintenance/storage/blobs.sql @@ -0,0 +1,8 @@ +-- Blobs table for external storage + +CREATE TABLE /*$wgDBprefix*/blobs ( + blob_id int(8) NOT NULL AUTO_INCREMENT, + blob_text mediumtext, + PRIMARY KEY (blob_id) +) TYPE=InnoDB; + diff --git a/maintenance/storage/checkStorage.php b/maintenance/storage/checkStorage.php new file mode 100644 index 00000000..a83d2744 --- /dev/null +++ b/maintenance/storage/checkStorage.php @@ -0,0 +1,468 @@ +<?php
+
+/**
+ * Fsck for MediaWiki
+ */
+
+define( 'CONCAT_HEADER', 'O:27:"concatenatedgziphistoryblob"' );
+
+if ( !defined( 'MEDIAWIKI' ) ) {
+ require_once( dirname(__FILE__) . '/../commandLine.inc' );
+ require_once( 'ExternalStore.php' );
+ require_once( 'ExternalStoreDB.php' );
+ require_once( 'SpecialImport.php' );
+
+ $cs = new CheckStorage;
+ $fix = isset( $options['fix'] );
+ if ( isset( $args[0] ) ) {
+ $xml = $args[0];
+ } else {
+ $xml = false;
+ }
+ $cs->check( $fix, $xml );
+}
+
+
+//----------------------------------------------------------------------------------
+
+class CheckStorage
+{
+ var $oldIdMap, $errors;
+ var $dbStore = null;
+
+ var $errorDescriptions = array(
+ 'restore text' => 'Damaged text, need to be restored from a backup',
+ 'restore revision' => 'Damaged revision row, need to be restored from a backup',
+ 'unfixable' => 'Unexpected errors with no automated fixing method',
+ 'fixed' => 'Errors already fixed',
+ 'fixable' => 'Errors which would already be fixed if --fix was specified',
+ );
+
+ function check( $fix = false, $xml = '' ) {
+ $fname = 'checkStorage';
+ $dbr =& wfGetDB( DB_SLAVE );
+ if ( $fix ) {
+ $dbw =& wfGetDB( DB_MASTER );
+ print "Checking, will fix errors if possible...\n";
+ } else {
+ print "Checking...\n";
+ }
+ $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, $fname );
+ $chunkSize = 1000;
+ $flagStats = array();
+ $objectStats = array();
+ $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
+ $this->errors = array(
+ 'restore text' => array(),
+ 'restore revision' => array(),
+ 'unfixable' => array(),
+ 'fixed' => array(),
+ 'fixable' => array(),
+ );
+
+ for ( $chunkStart = 1 ; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
+ $chunkEnd = $chunkStart + $chunkSize - 1;
+ //print "$chunkStart of $maxRevId\n";
+
+ // Fetch revision rows
+ $this->oldIdMap = array();
+ $dbr->ping();
+ $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
+ array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), $fname );
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
+ }
+ $dbr->freeResult( $res );
+
+ if ( !count( $this->oldIdMap ) ) {
+ continue;
+ }
+
+ // Fetch old_flags
+ $missingTextRows = array_flip( $this->oldIdMap );
+ $externalRevs = array();
+ $objectRevs = array();
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
+ 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', $fname );
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $flags = $row->old_flags;
+ $id = $row->old_id;
+
+ // Create flagStats row if it doesn't exist
+ $flagStats = $flagStats + array( $flags => 0 );
+ // Increment counter
+ $flagStats[$flags]++;
+
+ // Not missing
+ unset( $missingTextRows[$row->old_id] );
+
+ // Check for external or object
+ if ( $flags == '' ) {
+ $flagArray = array();
+ } else {
+ $flagArray = explode( ',', $flags );
+ }
+ if ( in_array( 'external', $flagArray ) ) {
+ $externalRevs[] = $id;
+ } elseif ( in_array( 'object', $flagArray ) ) {
+ $objectRevs[] = $id;
+ }
+
+ // Check for unrecognised flags
+ if ( $flags == '0' ) {
+ // This is a known bug from 2004
+ // It's safe to just erase the old_flags field
+ if ( $fix ) {
+ $this->error( 'fixed', "Warning: old_flags set to 0", $id );
+ $dbw->ping();
+ $dbw->update( 'text', array( 'old_flags' => '' ),
+ array( 'old_id' => $id ), $fname );
+ echo "Fixed\n";
+ } else {
+ $this->error( 'fixable', "Warning: old_flags set to 0", $id );
+ }
+ } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
+ $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
+ }
+ }
+ $dbr->freeResult( $res );
+
+ // Output errors for any missing text rows
+ foreach ( $missingTextRows as $oldId => $revId ) {
+ $this->error( 'restore revision', "Error: missing text row", $oldId );
+ }
+
+ // Verify external revisions
+ $externalConcatBlobs = array();
+ $externalNormalBlobs = array();
+ if ( count( $externalRevs ) ) {
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
+ array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), $fname );
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $urlParts = explode( '://', $row->old_text, 2 );
+ if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
+ $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
+ continue;
+ }
+ list( $proto, $path ) = $urlParts;
+ if ( $proto != 'DB' ) {
+ $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
+ continue;
+ }
+ $path = explode( '/', $row->old_text );
+ $cluster = $path[2];
+ $id = $path[3];
+ if ( isset( $path[4] ) ) {
+ $externalConcatBlobs[$cluster][$id][] = $row->old_id;
+ } else {
+ $externalNormalBlobs[$cluster][$id][] = $row->old_id;
+ }
+ }
+ $dbr->freeResult( $res );
+ }
+
+ // Check external concat blobs for the right header
+ $this->checkExternalConcatBlobs( $externalConcatBlobs );
+
+ // Check external normal blobs for existence
+ if ( count( $externalNormalBlobs ) ) {
+ if ( is_null( $this->dbStore ) ) {
+ $this->dbStore = new ExternalStoreDB;
+ }
+ foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
+ $blobIds = array_keys( $xBlobIds );
+ $extDb =& $this->dbStore->getSlave( $cluster );
+ $blobsTable = $this->dbStore->getTable( $extDb );
+ $res = $extDb->select( $blobsTable,
+ array( 'blob_id' ),
+ array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
+ while ( $row = $extDb->fetchObject( $res ) ) {
+ unset( $xBlobIds[$row->blob_id] );
+ }
+ $extDb->freeResult( $res );
+ // Print errors for missing blobs rows
+ foreach ( $xBlobIds as $blobId => $oldId ) {
+ $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
+ }
+ }
+ }
+
+ // Check local objects
+ $dbr->ping();
+ $concatBlobs = array();
+ $curIds = array();
+ if ( count( $objectRevs ) ) {
+ $headerLength = 300;
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
+ array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), $fname );
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $oldId = $row->old_id;
+ if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
+ $this->error( 'restore text', "Error: invalid object header", $oldId );
+ continue;
+ }
+
+ $className = strtolower( $matches[2] );
+ if ( strlen( $className ) != $matches[1] ) {
+ $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId );
+ continue;
+ }
+
+ $objectStats = $objectStats + array( $className => 0 );
+ $objectStats[$className]++;
+
+ switch ( $className ) {
+ case 'concatenatedgziphistoryblob':
+ // Good
+ break;
+ case 'historyblobstub':
+ case 'historyblobcurstub':
+ if ( strlen( $row->header ) == $headerLength ) {
+ $this->error( 'unfixable', "Error: overlong stub header", $oldId );
+ continue;
+ }
+ $stubObj = unserialize( $row->header );
+ if ( !is_object( $stubObj ) ) {
+ $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
+ continue;
+ }
+ if ( $className == 'historyblobstub' ) {
+ $concatBlobs[$stubObj->mOldId][] = $oldId;
+ } else {
+ $curIds[$stubObj->mCurId][] = $oldId;
+ }
+ break;
+ default:
+ $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
+ }
+ }
+ $dbr->freeResult( $res );
+ }
+
+ // Check local concat blob validity
+ $externalConcatBlobs = array();
+ if ( count( $concatBlobs ) ) {
+ $headerLength = 300;
+ $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
+ array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), $fname );
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $flags = explode( ',', $row->old_flags );
+ if ( in_array( 'external', $flags ) ) {
+ // Concat blob is in external storage?
+ if ( in_array( 'object', $flags ) ) {
+ $urlParts = explode( '/', $row->header );
+ if ( $urlParts[0] != 'DB:' ) {
+ $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
+ } else {
+ $cluster = $urlParts[2];
+ $id = $urlParts[3];
+ if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
+ $externalConcatBlobs[$cluster][$id] = array();
+ }
+ $externalConcatBlobs[$cluster][$id] = array_merge(
+ $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
+ );
+ }
+ } else {
+ $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
+ $concatBlobs[$row->old_id] );
+ }
+ } elseif ( strcasecmp( substr( $row->header, 0, strlen( CONCAT_HEADER ) ), CONCAT_HEADER ) ) {
+ $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
+ $concatBlobs[$row->old_id] );
+ } # else good
+
+ unset( $concatBlobs[$row->old_id] );
+ }
+ $dbr->freeResult( $res );
+ }
+
+ // Check targets of unresolved stubs
+ $this->checkExternalConcatBlobs( $externalConcatBlobs );
+
+ // next chunk
+ }
+
+ print "\n\nErrors:\n";
+ foreach( $this->errors as $name => $errors ) {
+ if ( count( $errors ) ) {
+ $description = $this->errorDescriptions[$name];
+ echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
+ }
+ }
+
+ if ( count( $this->errors['restore text'] ) && $fix ) {
+ if ( (string)$xml !== '' ) {
+ $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
+ } else {
+ echo "Can't fix text, no XML backup specified\n";
+ }
+ }
+
+ print "\nFlag statistics:\n";
+ $total = array_sum( $flagStats );
+ foreach ( $flagStats as $flag => $count ) {
+ printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
+ }
+ print "\nLocal object statistics:\n";
+ $total = array_sum( $objectStats );
+ foreach ( $objectStats as $className => $count ) {
+ printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
+ }
+ }
+
+
+ function error( $type, $msg, $ids ) {
+ if ( is_array( $ids ) && count( $ids ) == 1 ) {
+ $ids = reset( $ids );
+ }
+ if ( is_array( $ids ) ) {
+ $revIds = array();
+ foreach ( $ids as $id ) {
+ $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
+ }
+ print "$msg in text rows " . implode( ', ', $ids ) .
+ ", revisions " . implode( ', ', $revIds ) . "\n";
+ } else {
+ $id = $ids;
+ $revIds = array_keys( $this->oldIdMap, $id );
+ if ( count( $revIds ) == 1 ) {
+ print "$msg in old_id $id, rev_id {$revIds[0]}\n";
+ } else {
+ print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
+ }
+ }
+ $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
+ }
+
+ function checkExternalConcatBlobs( $externalConcatBlobs ) {
+ $fname = 'CheckStorage::checkExternalConcatBlobs';
+ if ( !count( $externalConcatBlobs ) ) {
+ return;
+ }
+
+ if ( is_null( $this->dbStore ) ) {
+ $this->dbStore = new ExternalStoreDB;
+ }
+
+ foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
+ $blobIds = array_keys( $oldIds );
+ $extDb =& $this->dbStore->getSlave( $cluster );
+ $blobsTable = $this->dbStore->getTable( $extDb );
+ $headerLength = strlen( CONCAT_HEADER );
+ $res = $extDb->select( $blobsTable,
+ array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
+ array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), $fname );
+ while ( $row = $extDb->fetchObject( $res ) ) {
+ if ( strcasecmp( $row->header, CONCAT_HEADER ) ) {
+ $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
+ $oldIds[$row->blob_id] );
+ }
+ unset( $oldIds[$row->blob_id] );
+
+ }
+ $extDb->freeResult( $res );
+
+ // Print errors for missing blobs rows
+ foreach ( $oldIds as $blobId => $oldIds ) {
+ $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds );
+ }
+ }
+ }
+
+ function restoreText( $revIds, $xml ) {
+ global $wgTmpDirectory, $wgDBname;
+
+ if ( !count( $revIds ) ) {
+ return;
+ }
+
+ print "Restoring text from XML backup...\n";
+
+ $revFileName = "$wgTmpDirectory/broken-revlist-$wgDBname";
+ $filteredXmlFileName = "$wgTmpDirectory/filtered-$wgDBname.xml";
+
+ // Write revision list
+ if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
+ echo "Error writing revision list, can't restore text\n";
+ return;
+ }
+
+ // Run mwdumper
+ echo "Filtering XML dump...\n";
+ $exitStatus = 0;
+ passthru( 'mwdumper ' .
+ wfEscapeShellArg(
+ "--output=file:$filteredXmlFileName",
+ "--filter=revlist:$revFileName",
+ $xml
+ ), $exitStatus
+ );
+
+ if ( $exitStatus ) {
+ echo "mwdumper died with exit status $exitStatus\n";
+ return;
+ }
+
+ $file = fopen( $filteredXmlFileName, 'r' );
+ if ( !$file ) {
+ echo "Unable to open filtered XML file\n";
+ return;
+ }
+
+ $dbr =& wfGetDB( DB_SLAVE );
+ $dbw =& wfGetDB( DB_MASTER );
+ $dbr->ping();
+ $dbw->ping();
+
+ $source = new ImportStreamSource( $file );
+ $importer = new WikiImporter( $source );
+ $importer->setRevisionCallback( array( &$this, 'importRevision' ) );
+ $importer->doImport();
+ }
+
+ function importRevision( &$revision, &$importer ) {
+ $fname = 'CheckStorage::importRevision';
+
+ $id = $revision->getID();
+ $text = $revision->getText();
+ if ( $text === '' ) {
+ // This is what happens if the revision was broken at the time the
+ // dump was made. Unfortunately, it also happens if the revision was
+ // legitimately blank, so there's no way to tell the difference. To
+ // be safe, we'll skip it and leave it broken
+ $id = $id ? $id : '';
+ echo "Revision $id is blank in the dump, may have been broken before export\n";
+ return;
+ }
+
+ if ( !$id ) {
+ // No ID, can't import
+ echo "No id tag in revision, can't import\n";
+ return;
+ }
+
+ // Find text row again
+ $dbr =& wfGetDB( DB_SLAVE );
+ $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), $fname );
+ if ( !$oldId ) {
+ echo "Missing revision row for rev_id $id\n";
+ return;
+ }
+
+ // Compress the text
+ $flags = Revision::compressRevisionText( $text );
+
+ // Update the text row
+ $dbw->update( 'text',
+ array( 'old_flags' => $flags, 'old_text' => $text ),
+ array( 'old_id' => $oldId ),
+ $fname, array( 'LIMIT' => 1 )
+ );
+
+ // Remove it from the unfixed list and add it to the fixed list
+ unset( $this->errors['restore text'][$id] );
+ $this->errors['fixed'][$id] = true;
+ }
+}
+?>
diff --git a/maintenance/storage/compressOld.inc b/maintenance/storage/compressOld.inc new file mode 100644 index 00000000..b7d7094f --- /dev/null +++ b/maintenance/storage/compressOld.inc @@ -0,0 +1,300 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( 'Revision.php' ); +require_once( 'ExternalStoreDB.php' ); + +/** @todo document */ +function compressOldPages( $start = 0, $extdb = '' ) { + $fname = 'compressOldPages'; + + $chunksize = 50; + print "Starting from old_id $start...\n"; + $dbw =& wfGetDB( DB_MASTER ); + do { + $end = $start + $chunksize; + $res = $dbw->select( 'text', array( 'old_id','old_flags','old_namespace','old_title','old_text' ), + "old_id>=$start", $fname, array( 'ORDER BY' => 'old_id', 'LIMIT' => $chunksize, 'FOR UPDATE' ) ); + if( $dbw->numRows( $res ) == 0 ) { + break; + } + $last = $start; + while( $row = $dbw->fetchObject( $res ) ) { + # print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n"; + compressPage( $row, $extdb ); + $last = $row->old_id; + } + $dbw->freeResult( $res ); + $start = $last + 1; # Deletion may leave long empty stretches + print "$start...\n"; + } while( true ); +} + +/** @todo document */ +function compressPage( $row, $extdb ) { + $fname = 'compressPage'; + if ( false !== strpos( $row->old_flags, 'gzip' ) || false !== strpos( $row->old_flags, 'object' ) ) { + #print "Already compressed row {$row->old_id}\n"; + return false; + } + $dbw =& wfGetDB( DB_MASTER ); + $flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip"; + $compress = gzdeflate( $row->old_text ); + + # Store in external storage if required + if ( $extdb !== '' ) { + $storeObj = new ExternalStoreDB; + $compress = $storeObj->store( $extdb, $compress ); + if ( $compress === false ) { + print "Unable to store object\n"; + return false; + } + } + + # Update text row + $dbw->update( 'text', + array( /* SET */ + 'old_flags' => $flags, + 'old_text' => $compress + ), array( /* WHERE */ + 'old_id' => $row->old_id + ), $fname, 'LIMIT 1' + ); + return true; +} + +define( 'LS_INDIVIDUAL', 0 ); +define( 'LS_CHUNKED', 1 ); + +/** @todo document */ +function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorThreshold, $beginDate, + $endDate, $extdb="", $maxPageId = false ) +{ + $fname = 'compressWithConcat'; + $loadStyle = LS_CHUNKED; + + $dbr =& wfGetDB( DB_SLAVE ); + $dbw =& wfGetDB( DB_MASTER ); + + # Set up external storage + if ( $extdb != '' ) { + $storeObj = new ExternalStoreDB; + } + + # Get all articles by page_id + if ( !$maxPageId ) { + $maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', $fname ); + } + print "Starting from $startId of $maxPageId\n"; + $pageConds = array(); + + /* + if ( $exclude_ns0 ) { + print "Excluding main namespace\n"; + $pageConds[] = 'page_namespace<>0'; + } + if ( $queryExtra ) { + $pageConds[] = $queryExtra; + } + */ + + # For each article, get a list of revisions which fit the criteria + + # No recompression, use a condition on old_flags + # Don't compress object type entities, because that might produce data loss when + # overwriting bulk storage concat rows. Don't compress external references, because + # the script doesn't yet delete rows from external storage. + $conds = array( + "old_flags NOT LIKE '%object%' AND old_flags NOT LIKE '%external%'"); + + if ( $beginDate ) { + $conds[] = "rev_timestamp>'" . $beginDate . "'"; + } + if ( $endDate ) { + $conds[] = "rev_timestamp<'" . $endDate . "'"; + } + if ( $loadStyle == LS_CHUNKED ) { + $tables = array( 'revision', 'text' ); + $fields = array( 'rev_id', 'rev_text_id', 'old_flags', 'old_text' ); + $conds[] = 'rev_text_id=old_id'; + $revLoadOptions = 'FOR UPDATE'; + } else { + $tables = array( 'revision' ); + $fields = array( 'rev_id', 'rev_text_id' ); + $revLoadOptions = array(); + } + + # Don't work with current revisions + # Don't lock the page table for update either -- TS 2006-04-04 + #$tables[] = 'page'; + #$conds[] = 'page_id=rev_page AND rev_id != page_latest'; + + $oldReadsSinceLastSlaveWait = 0; #check slave lag periodically + $totalMatchingRevisions = 0; + $masterPos = false; + for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) { + wfWaitForSlaves( 5 ); + + # Wake up + $dbr->ping(); + + # Get the page row + $pageRes = $dbr->select( 'page', + array('page_id', 'page_namespace', 'page_title','page_latest'), + $pageConds + array('page_id' => $pageId), $fname ); + if ( $dbr->numRows( $pageRes ) == 0 ) { + continue; + } + $pageRow = $dbr->fetchObject( $pageRes ); + + # Display progress + $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title ); + print "$pageId\t" . $titleObj->getPrefixedDBkey() . " "; + + # Load revisions + $revRes = $dbw->select( $tables, $fields, + array( + 'rev_page' => $pageRow->page_id, + # Don't operate on the current revision + # Use < instead of <> in case the current revision has changed + # since the page select, which wasn't locking + 'rev_id < ' . $pageRow->page_latest + ) + $conds, + $fname, + $revLoadOptions + ); + $revs = array(); + while ( $revRow = $dbw->fetchObject( $revRes ) ) { + $revs[] = $revRow; + } + + if ( count( $revs ) < 2) { + # No revisions matching, no further processing + print "\n"; + continue; + } + + # For each chunk + $i = 0; + while ( $i < count( $revs ) ) { + if ( $i < count( $revs ) - $maxChunkSize ) { + $thisChunkSize = $maxChunkSize; + } else { + $thisChunkSize = count( $revs ) - $i; + } + + $chunk = new ConcatenatedGzipHistoryBlob(); + $stubs = array(); + $dbw->begin(); + $usedChunk = false; + $primaryOldid = $revs[$i]->rev_text_id; + + # Get the text of each revision and add it to the object + for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy( $maxChunkFactor, $factorThreshold ); $j++ ) { + $oldid = $revs[$i + $j]->rev_text_id; + + # Get text + if ( $loadStyle == LS_INDIVIDUAL ) { + $textRow = $dbw->selectRow( 'text', + array( 'old_flags', 'old_text' ), + array( 'old_id' => $oldid ), + $fname, + 'FOR UPDATE' + ); + $text = Revision::getRevisionText( $textRow ); + } else { + $text = Revision::getRevisionText( $revs[$i + $j] ); + } + + if ( $text === false ) { + print "\nError, unable to get text in old_id $oldid\n"; + #$dbw->delete( 'old', array( 'old_id' => $oldid ) ); + } + + if ( $extdb == "" && $j == 0 ) { + $chunk->setText( $text ); + print '.'; + } else { + # Don't make a stub if it's going to be longer than the article + # Stubs are typically about 100 bytes + if ( strlen( $text ) < 120 ) { + $stub = false; + print 'x'; + } else { + $stub = $chunk->addItem( $text ); + $stub->setLocation( $primaryOldid ); + $stub->setReferrer( $oldid ); + print '.'; + $usedChunk = true; + } + $stubs[$j] = $stub; + } + } + $thisChunkSize = $j; + + # If we couldn't actually use any stubs because the pages were too small, do nothing + if ( $usedChunk ) { + if ( $extdb != "" ) { + # Move blob objects to External Storage + $stored = $storeObj->store( $extdb, serialize( $chunk )); + if ($stored === false) { + print "Unable to store object\n"; + return false; + } + # Store External Storage URLs instead of Stub placeholders + foreach ($stubs as $stub) { + if ($stub===false) + continue; + # $stored should provide base path to a BLOB + $url = $stored."/".$stub->getHash(); + $dbw->update( 'text', + array( /* SET */ + 'old_text' => $url, + 'old_flags' => 'external,utf-8', + ), array ( /* WHERE */ + 'old_id' => $stub->getReferrer(), + ) + ); + } + } else { + # Store the main object locally + $dbw->update( 'text', + array( /* SET */ + 'old_text' => serialize( $chunk ), + 'old_flags' => 'object,utf-8', + ), array( /* WHERE */ + 'old_id' => $primaryOldid + ) + ); + + # Store the stub objects + for ( $j = 1; $j < $thisChunkSize; $j++ ) { + # Skip if not compressing + if ( $stubs[$j] !== false ) { + $dbw->update( 'text', + array( /* SET */ + 'old_text' => serialize($stubs[$j]), + 'old_flags' => 'object,utf-8', + ), array( /* WHERE */ + 'old_id' => $revs[$i + $j]->rev_text_id + ) + ); + } + } + } + } + # Done, next + print "/"; + $dbw->commit(); + $i += $thisChunkSize; + wfWaitForSlaves( 5 ); + } + print "\n"; + } + return true; +} +?> diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php new file mode 100644 index 00000000..d597f1df --- /dev/null +++ b/maintenance/storage/compressOld.php @@ -0,0 +1,82 @@ +<?php +/** + * Compress the text of a wiki + * + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ + +/** + * Usage: + * + * Non-wikimedia + * php compressOld.php [options...] + * + * Wikimedia + * php compressOld.php <database> [options...] + * + * Options are: + * -t <type> set compression type to either: + * gzip: compress revisions independently + * concat: concatenate revisions and compress in chunks (default) + * -c <chunk-size> maximum number of revisions in a concat chunk + * -b <begin-date> earliest date to check for uncompressed revisions + * -e <end-date> latest revision date to compress + * -s <start-id> the old_id to start from + * -f <max-factor> the maximum ratio of compressed chunk bytes to uncompressed avg. revision bytes + * -h <threshold> is a minimum number of KB, where <max-factor> cuts in + * --extdb <cluster> store specified revisions in an external cluster (untested) + * + */ + +$optionsWithArgs = array( 't', 'c', 's', 'f', 'h', 'extdb', 'endid' ); +require_once( "../commandLine.inc" ); +require_once( "compressOld.inc" ); + +if( !function_exists( "gzdeflate" ) ) { + print "You must enable zlib support in PHP to compress old revisions!\n"; + print "Please see http://www.php.net/manual/en/ref.zlib.php\n\n"; + wfDie(); +} + +$defaults = array( + 't' => 'concat', + 'c' => 20, + 's' => 0, + 'f' => 5, + 'h' => 100, + 'b' => '', + 'e' => '', + 'extdb' => '', + 'endid' => false, +); + +$options = $options + $defaults; + +if ( $options['t'] != 'concat' && $options['t'] != 'gzip' ) { + print "Type \"{$options['t']}\" not supported\n"; +} + +if ( $options['extdb'] != '' ) { + print "Compressing database $wgDBname to external cluster {$options['extdb']}\n" . str_repeat('-', 76) . "\n\n"; +} else { + print "Compressing database $wgDBname\n" . str_repeat('-', 76) . "\n\n"; +} + +$success = true; +if ( $options['t'] == 'concat' ) { + $success = compressWithConcat( $options['s'], $options['c'], $options['f'], $options['h'], $options['b'], + $options['e'], $options['extdb'], $options['endid'] ); +} else { + compressOldPages( $options['s'], $options['extdb'] ); +} + +if ( $success ) { + print "Done.\n"; +} + +exit(); + +?> diff --git a/maintenance/storage/dumpRev.php b/maintenance/storage/dumpRev.php new file mode 100644 index 00000000..4d0ccb58 --- /dev/null +++ b/maintenance/storage/dumpRev.php @@ -0,0 +1,14 @@ +<?php + +require_once( 'commandLine.inc' ); +$dbr =& wfGetDB( DB_SLAVE ); +$row = $dbr->selectRow( 'old', array( 'old_flags', 'old_text' ), array( 'old_id' => $args[0] ) ); +$obj = unserialize( $row->old_text ); + +if ( get_class( $obj ) == 'concatenatedgziphistoryblob' ) { + print_r( array_keys( $obj->mItems ) ); +} else { + var_dump( $obj ); +} + +?> diff --git a/maintenance/storage/make-blobs b/maintenance/storage/make-blobs new file mode 100755 index 00000000..9eb7e83e --- /dev/null +++ b/maintenance/storage/make-blobs @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ X$2 == X ];then + echo 'Usage: make-blobs <server> <db>' + exit 1 +fi + +echo "CREATE DATABASE $2" | mysql -u wikiadmin -p`wikiadmin_pass` -h $1 && \ +mysql -u wikiadmin -p`wikiadmin_pass` -h $1 $2 < blobs.sql + + diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php new file mode 100644 index 00000000..0b46f70b --- /dev/null +++ b/maintenance/storage/moveToExternal.php @@ -0,0 +1,97 @@ +<?php + +define( 'REPORTING_INTERVAL', 100 ); + +if ( !defined( 'MEDIAWIKI' ) ) { + $optionsWithArgs = array( 'm' ); + + require_once( '../commandLine.inc' ); + require_once( 'ExternalStoreDB.php' ); + require_once( 'resolveStubs.php' ); + + $fname = 'moveToExternal'; + + if ( !isset( $args[0] ) ) { + print "Usage: php moveToExternal.php [-m <maxid>] <cluster>\n"; + exit; + } + + $cluster = $args[0]; + $dbw =& wfGetDB( DB_MASTER ); + + if ( isset( $options['m'] ) ) { + $maxID = $options['m']; + } else { + $maxID = $dbw->selectField( 'text', 'MAX(old_id)', false, $fname ); + } + + moveToExternal( $cluster, $maxID ); +} + + + +function moveToExternal( $cluster, $maxID ) { + $fname = 'moveToExternal'; + $dbw =& wfGetDB( DB_MASTER ); + + print "Moving $maxID text rows to external storage\n"; + $ext = new ExternalStoreDB; + for ( $id = 1; $id <= $maxID; $id++ ) { + if ( !($id % REPORTING_INTERVAL) ) { + print "$id\n"; + wfWaitForSlaves( 5 ); + } + $row = $dbw->selectRow( 'text', array( 'old_flags', 'old_text' ), + array( + 'old_id' => $id, + "old_flags NOT LIKE '%external%'", + ), $fname ); + if ( !$row ) { + # Non-existent or already done + continue; + } + + # Resolve stubs + $text = $row->old_text; + if ( $row->old_flags === '' ) { + $flags = 'external'; + } else { + $flags = "{$row->old_flags},external"; + } + + if ( strpos( $flags, 'object' ) !== false ) { + $obj = unserialize( $text ); + $className = strtolower( get_class( $obj ) ); + if ( $className == 'historyblobstub' ) { + resolveStub( $id, $row->old_text, $row->old_flags ); + continue; + } elseif ( $className == 'historyblobcurstub' ) { + $text = gzdeflate( $obj->getText() ); + $flags = 'utf-8,gzip,external'; + } elseif ( $className == 'concatenatedgziphistoryblob' ) { + // Do nothing + } else { + print "Warning: unrecognised object class \"$className\"\n"; + continue; + } + } + + if ( strlen( $text ) < 100 ) { + // Don't move tiny revisions + continue; + } + + #print "Storing " . strlen( $text ) . " bytes to $url\n"; + + $url = $ext->store( $cluster, $text ); + if ( !$url ) { + print "Error writing to external storage\n"; + exit; + } + $dbw->update( 'text', + array( 'old_flags' => $flags, 'old_text' => $url ), + array( 'old_id' => $id ), $fname ); + } +} + +?> diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php new file mode 100644 index 00000000..e93d5c97 --- /dev/null +++ b/maintenance/storage/resolveStubs.php @@ -0,0 +1,100 @@ +<?php + +define( 'REPORTING_INTERVAL', 100 ); + +if ( !defined( 'MEDIAWIKI' ) ) { + $optionsWithArgs = array( 'm' ); + + require_once( '../commandLine.inc' ); + require_once( 'includes/ExternalStoreDB.php' ); + + resolveStubs(); +} + +/** + * Convert history stubs that point to an external row to direct + * external pointers + */ +function resolveStubs() { + $fname = 'resolveStubs'; + + $dbr =& wfGetDB( DB_SLAVE ); + $dbw =& wfGetDB( DB_MASTER ); + $maxID = $dbr->selectField( 'text', 'MAX(old_id)', false, $fname ); + $blockSize = 10000; + $numBlocks = intval( $maxID / $blockSize ) + 1; + + for ( $b = 0; $b < $numBlocks; $b++ ) { + wfWaitForSlaves( 5 ); + + printf( "%5.2f%%\n", $b / $numBlocks * 100 ); + $start = intval($maxID / $numBlocks) * $b + 1; + $end = intval($maxID / $numBlocks) * ($b + 1); + $stubs = array(); + $flagsArray = array(); + + + $res = $dbr->select( 'text', array( 'old_id', 'old_text', 'old_flags' ), + "old_id>=$start AND old_id<=$end " . + # Using a more restrictive flag set for now, until I do some more analysis -- TS + #"AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' ". + + "AND old_flags='object' " . + "AND old_text LIKE 'O:15:\"historyblobstub\"%'", $fname ); + while ( $row = $dbr->fetchObject( $res ) ) { + resolveStub( $row->old_id, $row->old_text, $row->old_flags ); + } + $dbr->freeResult( $res ); + + + } + print "100%\n"; +} + +/** + * Resolve a history stub + */ +function resolveStub( $id, $stubText, $flags ) { + $fname = 'resolveStub'; + + $stub = unserialize( $stubText ); + $flags = explode( ',', $flags ); + + $dbr =& wfGetDB( DB_SLAVE ); + $dbw =& wfGetDB( DB_MASTER ); + + if ( strtolower( get_class( $stub ) ) !== 'historyblobstub' ) { + print "Error found object of class " . get_class( $stub ) . ", expecting historyblobstub\n"; + return; + } + + # Get the (maybe) external row + $externalRow = $dbr->selectRow( 'text', array( 'old_text' ), + array( 'old_id' => $stub->mOldId, "old_flags LIKE '%external%'" ), + $fname + ); + + if ( !$externalRow ) { + # Object wasn't external + return; + } + + # Preserve the legacy encoding flag, but switch from object to external + if ( in_array( 'utf-8', $flags ) ) { + $newFlags = 'external,utf-8'; + } else { + $newFlags = 'external'; + } + + # Update the row + $dbw->update( 'text', + array( /* SET */ + 'old_flags' => $newFlags, + 'old_text' => $externalRow->old_text . '/' . $stub->mHash + ), + array( /* WHERE */ + 'old_id' => $id + ), $fname + ); +} +?> diff --git a/maintenance/tables.sql b/maintenance/tables.sql new file mode 100644 index 00000000..288d4a06 --- /dev/null +++ b/maintenance/tables.sql @@ -0,0 +1,998 @@ +-- SQL to create the initial tables for the MediaWiki database. +-- This is read and executed by the install script; you should +-- not have to run it by itself unless doing a manual install. + +-- +-- General notes: +-- +-- If possible, create tables as InnoDB to benefit from the +-- superior resiliency against crashes and ability to read +-- during writes (and write during reads!) +-- +-- Only the 'searchindex' table requires MyISAM due to the +-- requirement for fulltext index support, which is missing +-- from InnoDB. +-- +-- +-- The MySQL table backend for MediaWiki currently uses +-- 14-character CHAR or VARCHAR fields to store timestamps. +-- The format is YYYYMMDDHHMMSS, which is derived from the +-- text format of MySQL's TIMESTAMP fields. +-- +-- Historically TIMESTAMP fields were used, but abandoned +-- in early 2002 after a lot of trouble with the fields +-- auto-updating. +-- +-- The PostgreSQL backend uses DATETIME fields for timestamps, +-- and we will migrate the MySQL definitions at some point as +-- well. +-- +-- +-- The /*$wgDBprefix*/ comments in this and other files are +-- replaced with the defined table prefix by the installer +-- and updater scripts. If you are installing or running +-- updates manually, you will need to manually insert the +-- table prefix if any when running these scripts. +-- + + +-- +-- The user table contains basic account information, +-- authentication keys, etc. +-- +-- Some multi-wiki sites may share a single central user table +-- between separate wikis using the $wgSharedDB setting. +-- +-- Note that when a external authentication plugin is used, +-- user table entries still need to be created to store +-- preferences and to key tracking information in the other +-- tables. +-- +CREATE TABLE /*$wgDBprefix*/user ( + user_id int(5) unsigned NOT NULL auto_increment, + + -- Usernames must be unique, must not be in the form of + -- an IP address. _Shouldn't_ allow slashes or case + -- conflicts. Spaces are allowed, and are _not_ converted + -- to underscores like titles. See the User::newFromName() for + -- the specific tests that usernames have to pass. + user_name varchar(255) binary NOT NULL default '', + + -- Optional 'real name' to be displayed in credit listings + user_real_name varchar(255) binary NOT NULL default '', + + -- Password hashes, normally hashed like so: + -- MD5(CONCAT(user_id,'-',MD5(plaintext_password))), see + -- wfEncryptPassword() in GlobalFunctions.php + user_password tinyblob NOT NULL default '', + + -- When using 'mail me a new password', a random + -- password is generated and the hash stored here. + -- The previous password is left in place until + -- someone actually logs in with the new password, + -- at which point the hash is moved to user_password + -- and the old password is invalidated. + user_newpassword tinyblob NOT NULL default '', + + -- Note: email should be restricted, not public info. + -- Same with passwords. + user_email tinytext NOT NULL default '', + + -- Newline-separated list of name=value defining the user + -- preferences + user_options blob NOT NULL default '', + + -- This is a timestamp which is updated when a user + -- logs in, logs out, changes preferences, or performs + -- some other action requiring HTML cache invalidation + -- to ensure that the UI is updated. + user_touched char(14) binary NOT NULL default '', + + -- A pseudorandomly generated value that is stored in + -- a cookie when the "remember password" feature is + -- used (previously, a hash of the password was used, but + -- this was vulnerable to cookie-stealing attacks) + user_token char(32) binary NOT NULL default '', + + -- Initially NULL; when a user's e-mail address has been + -- validated by returning with a mailed token, this is + -- set to the current timestamp. + user_email_authenticated CHAR(14) BINARY, + + -- Randomly generated token created when the e-mail address + -- is set and a confirmation test mail sent. + user_email_token CHAR(32) BINARY, + + -- Expiration date for the user_email_token + user_email_token_expires CHAR(14) BINARY, + + -- Timestamp of account registration. + -- Accounts predating this schema addition may contain NULL. + user_registration CHAR(14) BINARY, + + PRIMARY KEY user_id (user_id), + UNIQUE INDEX user_name (user_name), + INDEX (user_email_token) + +) TYPE=InnoDB; + +-- +-- User permissions have been broken out to a separate table; +-- this allows sites with a shared user table to have different +-- permissions assigned to a user in each project. +-- +-- This table replaces the old user_rights field which used a +-- comma-separated blob. +-- +CREATE TABLE /*$wgDBprefix*/user_groups ( + -- Key to user_id + ug_user int(5) unsigned NOT NULL default '0', + + -- Group names are short symbolic string keys. + -- The set of group names is open-ended, though in practice + -- only some predefined ones are likely to be used. + -- + -- At runtime $wgGroupPermissions will associate group keys + -- with particular permissions. A user will have the combined + -- permissions of any group they're explicitly in, plus + -- the implicit '*' and 'user' groups. + ug_group char(16) NOT NULL default '', + + PRIMARY KEY (ug_user,ug_group), + KEY (ug_group) +) TYPE=InnoDB; + +-- Stores notifications of user talk page changes, for the display +-- of the "you have new messages" box +CREATE TABLE /*$wgDBprefix*/user_newtalk ( + -- Key to user.user_id + user_id int(5) NOT NULL default '0', + -- If the user is an anonymous user hir IP address is stored here + -- since the user_id of 0 is ambiguous + user_ip varchar(40) NOT NULL default '', + INDEX user_id (user_id), + INDEX user_ip (user_ip) +); + + +-- +-- Core of the wiki: each page has an entry here which identifies +-- it by title and contains some essential metadata. +-- +CREATE TABLE /*$wgDBprefix*/page ( + -- Unique identifier number. The page_id will be preserved across + -- edits and rename operations, but not deletions and recreations. + page_id int(8) unsigned NOT NULL auto_increment, + + -- A page name is broken into a namespace and a title. + -- The namespace keys are UI-language-independent constants, + -- defined in includes/Defines.php + page_namespace int NOT NULL, + + -- The rest of the title, as text. + -- Spaces are transformed into underscores in title storage. + page_title varchar(255) binary NOT NULL, + + -- Comma-separated set of permission keys indicating who + -- can move or edit the page. + page_restrictions tinyblob NOT NULL default '', + + -- Number of times this page has been viewed. + page_counter bigint(20) unsigned NOT NULL default '0', + + -- 1 indicates the article is a redirect. + page_is_redirect tinyint(1) unsigned NOT NULL default '0', + + -- 1 indicates this is a new entry, with only one edit. + -- Not all pages with one edit are new pages. + page_is_new tinyint(1) unsigned NOT NULL default '0', + + -- Random value between 0 and 1, used for Special:Randompage + page_random real unsigned NOT NULL, + + -- This timestamp is updated whenever the page changes in + -- a way requiring it to be re-rendered, invalidating caches. + -- Aside from editing this includes permission changes, + -- creation or deletion of linked pages, and alteration + -- of contained templates. + page_touched char(14) binary NOT NULL default '', + + -- Handy key to revision.rev_id of the current revision. + -- This may be 0 during page creation, but that shouldn't + -- happen outside of a transaction... hopefully. + page_latest int(8) unsigned NOT NULL, + + -- Uncompressed length in bytes of the page's current source text. + page_len int(8) unsigned NOT NULL, + + PRIMARY KEY page_id (page_id), + UNIQUE INDEX name_title (page_namespace,page_title), + + -- Special-purpose indexes + INDEX (page_random), + INDEX (page_len) + +) TYPE=InnoDB; + +-- +-- Every edit of a page creates also a revision row. +-- This stores metadata about the revision, and a reference +-- to the text storage backend. +-- +CREATE TABLE /*$wgDBprefix*/revision ( + rev_id int(8) unsigned NOT NULL auto_increment, + + -- Key to page_id. This should _never_ be invalid. + rev_page int(8) unsigned NOT NULL, + + -- Key to text.old_id, where the actual bulk text is stored. + -- It's possible for multiple revisions to use the same text, + -- for instance revisions where only metadata is altered + -- or a rollback to a previous version. + rev_text_id int(8) unsigned NOT NULL, + + -- Text comment summarizing the change. + -- This text is shown in the history and other changes lists, + -- rendered in a subset of wiki markup by Linker::formatComment() + rev_comment tinyblob NOT NULL default '', + + -- Key to user.user_id of the user who made this edit. + -- Stores 0 for anonymous edits and for some mass imports. + rev_user int(5) unsigned NOT NULL default '0', + + -- Text username or IP address of the editor. + rev_user_text varchar(255) binary NOT NULL default '', + + -- Timestamp + rev_timestamp char(14) binary NOT NULL default '', + + -- Records whether the user marked the 'minor edit' checkbox. + -- Many automated edits are marked as minor. + rev_minor_edit tinyint(1) unsigned NOT NULL default '0', + + -- Not yet used; reserved for future changes to the deletion system. + rev_deleted tinyint(1) unsigned NOT NULL default '0', + + PRIMARY KEY rev_page_id (rev_page, rev_id), + UNIQUE INDEX rev_id (rev_id), + INDEX rev_timestamp (rev_timestamp), + INDEX page_timestamp (rev_page,rev_timestamp), + INDEX user_timestamp (rev_user,rev_timestamp), + INDEX usertext_timestamp (rev_user_text,rev_timestamp) + +) TYPE=InnoDB; + + +-- +-- Holds text of individual page revisions. +-- +-- Field names are a holdover from the 'old' revisions table in +-- MediaWiki 1.4 and earlier: an upgrade will transform that +-- table into the 'text' table to minimize unnecessary churning +-- and downtime. If upgrading, the other fields will be left unused. +-- +CREATE TABLE /*$wgDBprefix*/text ( + -- Unique text storage key number. + -- Note that the 'oldid' parameter used in URLs does *not* + -- refer to this number anymore, but to rev_id. + -- + -- revision.rev_text_id is a key to this column + old_id int(8) unsigned NOT NULL auto_increment, + + -- Depending on the contents of the old_flags field, the text + -- may be convenient plain text, or it may be funkily encoded. + old_text mediumblob NOT NULL default '', + + -- Comma-separated list of flags: + -- gzip: text is compressed with PHP's gzdeflate() function. + -- utf8: text was stored as UTF-8. + -- If $wgLegacyEncoding option is on, rows *without* this flag + -- will be converted to UTF-8 transparently at load time. + -- object: text field contained a serialized PHP object. + -- The object either contains multiple versions compressed + -- together to achieve a better compression ratio, or it refers + -- to another row where the text can be found. + old_flags tinyblob NOT NULL default '', + + PRIMARY KEY old_id (old_id) + +) TYPE=InnoDB; + +-- +-- Holding area for deleted articles, which may be viewed +-- or restored by admins through the Special:Undelete interface. +-- The fields generally correspond to the page, revision, and text +-- fields, with several caveats. +-- +CREATE TABLE /*$wgDBprefix*/archive ( + ar_namespace int NOT NULL default '0', + ar_title varchar(255) binary NOT NULL default '', + + -- Newly deleted pages will not store text in this table, + -- but will reference the separately existing text rows. + -- This field is retained for backwards compatibility, + -- so old archived pages will remain accessible after + -- upgrading from 1.4 to 1.5. + -- Text may be gzipped or otherwise funky. + ar_text mediumblob NOT NULL default '', + + -- Basic revision stuff... + ar_comment tinyblob NOT NULL default '', + ar_user int(5) unsigned NOT NULL default '0', + ar_user_text varchar(255) binary NOT NULL, + ar_timestamp char(14) binary NOT NULL default '', + ar_minor_edit tinyint(1) NOT NULL default '0', + + -- See ar_text note. + ar_flags tinyblob NOT NULL default '', + + -- When revisions are deleted, their unique rev_id is stored + -- here so it can be retained after undeletion. This is necessary + -- to retain permalinks to given revisions after accidental delete + -- cycles or messy operations like history merges. + -- + -- Old entries from 1.4 will be NULL here, and a new rev_id will + -- be created on undeletion for those revisions. + ar_rev_id int(8) unsigned, + + -- For newly deleted revisions, this is the text.old_id key to the + -- actual stored text. To avoid breaking the block-compression scheme + -- and otherwise making storage changes harder, the actual text is + -- *not* deleted from the text table, merely hidden by removal of the + -- page and revision entries. + -- + -- Old entries deleted under 1.2-1.4 will have NULL here, and their + -- ar_text and ar_flags fields will be used to create a new text + -- row upon undeletion. + ar_text_id int(8) unsigned, + + KEY name_title_timestamp (ar_namespace,ar_title,ar_timestamp) + +) TYPE=InnoDB; + + +-- +-- Track page-to-page hyperlinks within the wiki. +-- +CREATE TABLE /*$wgDBprefix*/pagelinks ( + -- Key to the page_id of the page containing the link. + pl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + pl_namespace int NOT NULL default '0', + pl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY pl_from(pl_from,pl_namespace,pl_title), + KEY (pl_namespace,pl_title) + +) TYPE=InnoDB; + + +-- +-- Track template inclusions. +-- +CREATE TABLE /*$wgDBprefix*/templatelinks ( + -- Key to the page_id of the page containing the link. + tl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + tl_namespace int NOT NULL default '0', + tl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + KEY (tl_namespace,tl_title) + +) TYPE=InnoDB; + +-- +-- Track links to images *used inline* +-- We don't distinguish live from broken links here, so +-- they do not need to be changed on upload/removal. +-- +CREATE TABLE /*$wgDBprefix*/imagelinks ( + -- Key to page_id of the page containing the image / media link. + il_from int(8) unsigned NOT NULL default '0', + + -- Filename of target image. + -- This is also the page_title of the file's description page; + -- all such pages are in namespace 6 (NS_IMAGE). + il_to varchar(255) binary NOT NULL default '', + + UNIQUE KEY il_from(il_from,il_to), + KEY (il_to) + +) TYPE=InnoDB; + +-- +-- Track category inclusions *used inline* +-- This tracks a single level of category membership +-- (folksonomic tagging, really). +-- +CREATE TABLE /*$wgDBprefix*/categorylinks ( + -- Key to page_id of the page defined as a category member. + cl_from int(8) unsigned NOT NULL default '0', + + -- Name of the category. + -- This is also the page_title of the category's description page; + -- all such pages are in namespace 14 (NS_CATEGORY). + cl_to varchar(255) binary NOT NULL default '', + + -- The title of the linking page, or an optional override + -- to determine sort order. Sorting is by binary order, which + -- isn't always ideal, but collations seem to be an exciting + -- and dangerous new world in MySQL... The sortkey is updated + -- if no override exists and cl_from is renamed. + -- + -- For MySQL 4.1+ with charset set to utf8, the sort key *index* + -- needs cut to be smaller than 1024 bytes (at 3 bytes per char). + -- To sort properly on the shorter key, this field needs to be + -- the same shortness. + cl_sortkey varchar(86) binary NOT NULL default '', + + -- This isn't really used at present. Provided for an optional + -- sorting method by approximate addition time. + cl_timestamp timestamp NOT NULL, + + UNIQUE KEY cl_from(cl_from,cl_to), + + -- We always sort within a given category... + KEY cl_sortkey(cl_to,cl_sortkey), + + -- Not really used? + KEY cl_timestamp(cl_to,cl_timestamp) + +) TYPE=InnoDB; + +-- +-- Track links to external URLs +-- +CREATE TABLE /*$wgDBprefix*/externallinks ( + -- page_id of the referring page + el_from int(8) unsigned NOT NULL default '0', + + -- The URL + el_to blob NOT NULL default '', + + -- In the case of HTTP URLs, this is the URL with any username or password + -- removed, and with the labels in the hostname reversed and converted to + -- lower case. An extra dot is added to allow for matching of either + -- example.com or *.example.com in a single scan. + -- Example: + -- http://user:password@sub.example.com/page.html + -- becomes + -- http://com.example.sub./page.html + -- which allows for fast searching for all pages under example.com with the + -- clause: + -- WHERE el_index LIKE 'http://com.example.%' + el_index blob NOT NULL default '', + + KEY (el_from, el_to(40)), + KEY (el_to(60), el_from), + KEY (el_index(60)) +) TYPE=InnoDB; + +-- +-- Track interlanguage links +-- +CREATE TABLE /*$wgDBprefix*/langlinks ( + -- page_id of the referring page + ll_from int(8) unsigned NOT NULL default '0', + + -- Language code of the target + ll_lang varchar(10) binary NOT NULL default '', + + -- Title of the target, including namespace + ll_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY (ll_from, ll_lang), + KEY (ll_lang, ll_title) +) TYPE=InnoDB; + +-- +-- Contains a single row with some aggregate info +-- on the state of the site. +-- +CREATE TABLE /*$wgDBprefix*/site_stats ( + -- The single row should contain 1 here. + ss_row_id int(8) unsigned NOT NULL, + + -- Total number of page views, if hit counters are enabled. + ss_total_views bigint(20) unsigned default '0', + + -- Total number of edits performed. + ss_total_edits bigint(20) unsigned default '0', + + -- An approximate count of pages matching the following criteria: + -- * in namespace 0 + -- * not a redirect + -- * contains the text '[[' + -- See Article::isCountable() in includes/Article.php + ss_good_articles bigint(20) unsigned default '0', + + -- Total pages, theoretically equal to SELECT COUNT(*) FROM page; except faster + ss_total_pages bigint(20) default '-1', + + -- Number of users, theoretically equal to SELECT COUNT(*) FROM user; + ss_users bigint(20) default '-1', + + -- Deprecated, no longer updated as of 1.5 + ss_admins int(10) default '-1', + + -- Number of images, equivalent to SELECT COUNT(*) FROM image + ss_images int(10) default '0', + + UNIQUE KEY ss_row_id (ss_row_id) + +) TYPE=InnoDB; + +-- +-- Stores an ID for every time any article is visited; +-- depending on $wgHitcounterUpdateFreq, it is +-- periodically cleared and the page_counter column +-- in the page table updated for the all articles +-- that have been visited.) +-- +CREATE TABLE /*$wgDBprefix*/hitcounter ( + hc_id INTEGER UNSIGNED NOT NULL +) TYPE=HEAP MAX_ROWS=25000; + + +-- +-- The internet is full of jerks, alas. Sometimes it's handy +-- to block a vandal or troll account. +-- +CREATE TABLE /*$wgDBprefix*/ipblocks ( + -- Primary key, introduced for privacy. + ipb_id int(8) NOT NULL auto_increment, + + -- Blocked IP address in dotted-quad form or user name. + ipb_address varchar(40) binary NOT NULL default '', + + -- Blocked user ID or 0 for IP blocks. + ipb_user int(8) unsigned NOT NULL default '0', + + -- User ID who made the block. + ipb_by int(8) unsigned NOT NULL default '0', + + -- Text comment made by blocker. + ipb_reason tinyblob NOT NULL default '', + + -- Creation (or refresh) date in standard YMDHMS form. + -- IP blocks expire automatically. + ipb_timestamp char(14) binary NOT NULL default '', + + -- Indicates that the IP address was banned because a banned + -- user accessed a page through it. If this is 1, ipb_address + -- will be hidden, and the block identified by block ID number. + ipb_auto tinyint(1) NOT NULL default '0', + + -- Time at which the block will expire. + ipb_expiry char(14) binary NOT NULL default '', + + -- Start and end of an address range, in hexadecimal + -- Size chosen to allow IPv6 + ipb_range_start varchar(32) NOT NULL default '', + ipb_range_end varchar(32) NOT NULL default '', + + PRIMARY KEY ipb_id (ipb_id), + INDEX ipb_address (ipb_address), + INDEX ipb_user (ipb_user), + INDEX ipb_range (ipb_range_start(8), ipb_range_end(8)) + +) TYPE=InnoDB; + + +-- +-- Uploaded images and other files. +-- +CREATE TABLE /*$wgDBprefix*/image ( + -- Filename. + -- This is also the title of the associated description page, + -- which will be in namespace 6 (NS_IMAGE). + img_name varchar(255) binary NOT NULL default '', + + -- File size in bytes. + img_size int(8) unsigned NOT NULL default '0', + + -- For images, size in pixels. + img_width int(5) NOT NULL default '0', + img_height int(5) NOT NULL default '0', + + -- Extracted EXIF metadata stored as a serialized PHP array. + img_metadata mediumblob NOT NULL, + + -- For images, bits per pixel if known. + img_bits int(3) NOT NULL default '0', + + -- Media type as defined by the MEDIATYPE_xxx constants + img_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + + -- major part of a MIME media type as defined by IANA + -- see http://www.iana.org/assignments/media-types/ + img_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") NOT NULL default "unknown", + + -- minor part of a MIME media type as defined by IANA + -- the minor parts are not required to adher to any standard + -- but should be consistent throughout the database + -- see http://www.iana.org/assignments/media-types/ + img_minor_mime varchar(32) NOT NULL default "unknown", + + -- Description field as entered by the uploader. + -- This is displayed in image upload history and logs. + img_description tinyblob NOT NULL default '', + + -- user_id and user_name of uploader. + img_user int(5) unsigned NOT NULL default '0', + img_user_text varchar(255) binary NOT NULL default '', + + -- Time of the upload. + img_timestamp char(14) binary NOT NULL default '', + + PRIMARY KEY img_name (img_name), + + -- Used by Special:Imagelist for sort-by-size + INDEX img_size (img_size), + + -- Used by Special:Newimages and Special:Imagelist + INDEX img_timestamp (img_timestamp) + +) TYPE=InnoDB; + +-- +-- Previous revisions of uploaded files. +-- Awkwardly, image rows have to be moved into +-- this table at re-upload time. +-- +CREATE TABLE /*$wgDBprefix*/oldimage ( + -- Base filename: key to image.img_name + oi_name varchar(255) binary NOT NULL default '', + + -- Filename of the archived file. + -- This is generally a timestamp and '!' prepended to the base name. + oi_archive_name varchar(255) binary NOT NULL default '', + + -- Other fields as in image... + oi_size int(8) unsigned NOT NULL default 0, + oi_width int(5) NOT NULL default 0, + oi_height int(5) NOT NULL default 0, + oi_bits int(3) NOT NULL default 0, + oi_description tinyblob NOT NULL default '', + oi_user int(5) unsigned NOT NULL default '0', + oi_user_text varchar(255) binary NOT NULL default '', + oi_timestamp char(14) binary NOT NULL default '', + + INDEX oi_name (oi_name(10)) + +) TYPE=InnoDB; + +-- +-- Record of deleted file data +-- +CREATE TABLE /*$wgDBprefix*/filearchive ( + -- Unique row id + fa_id int not null auto_increment, + + -- Original base filename; key to image.img_name, page.page_title, etc + fa_name varchar(255) binary NOT NULL default '', + + -- Filename of archived file, if an old revision + fa_archive_name varchar(255) binary default '', + + -- Which storage bin (directory tree or object store) the file data + -- is stored in. Should be 'deleted' for files that have been deleted; + -- any other bin is not yet in use. + fa_storage_group varchar(16), + + -- SHA-1 of the file contents plus extension, used as a key for storage. + -- eg 8f8a562add37052a1848ff7771a2c515db94baa9.jpg + -- + -- If NULL, the file was missing at deletion time or has been purged + -- from the archival storage. + fa_storage_key varchar(64) binary default '', + + -- Deletion information, if this file is deleted. + fa_deleted_user int, + fa_deleted_timestamp char(14) binary default '', + fa_deleted_reason text, + + -- Duped fields from image + fa_size int(8) unsigned default '0', + fa_width int(5) default '0', + fa_height int(5) default '0', + fa_metadata mediumblob, + fa_bits int(3) default '0', + fa_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, + fa_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart") default "unknown", + fa_minor_mime varchar(32) default "unknown", + fa_description tinyblob default '', + fa_user int(5) unsigned default '0', + fa_user_text varchar(255) binary default '', + fa_timestamp char(14) binary default '', + + PRIMARY KEY (fa_id), + INDEX (fa_name, fa_timestamp), -- pick out by image name + INDEX (fa_storage_group, fa_storage_key), -- pick out dupe files + INDEX (fa_deleted_timestamp), -- sort by deletion time + INDEX (fa_deleted_user) -- sort by deleter + +) TYPE=InnoDB; + +-- +-- Primarily a summary table for Special:Recentchanges, +-- this table contains some additional info on edits from +-- the last few days, see Article::editUpdates() +-- +CREATE TABLE /*$wgDBprefix*/recentchanges ( + rc_id int(8) NOT NULL auto_increment, + rc_timestamp varchar(14) binary NOT NULL default '', + rc_cur_time varchar(14) binary NOT NULL default '', + + -- As in revision + rc_user int(10) unsigned NOT NULL default '0', + rc_user_text varchar(255) binary NOT NULL default '', + + -- When pages are renamed, their RC entries do _not_ change. + rc_namespace int NOT NULL default '0', + rc_title varchar(255) binary NOT NULL default '', + + -- as in revision... + rc_comment varchar(255) binary NOT NULL default '', + rc_minor tinyint(3) unsigned NOT NULL default '0', + + -- Edits by user accounts with the 'bot' rights key are + -- marked with a 1 here, and will be hidden from the + -- default view. + rc_bot tinyint(3) unsigned NOT NULL default '0', + + rc_new tinyint(3) unsigned NOT NULL default '0', + + -- Key to page_id (was cur_id prior to 1.5). + -- This will keep links working after moves while + -- retaining the at-the-time name in the changes list. + rc_cur_id int(10) unsigned NOT NULL default '0', + + -- rev_id of the given revision + rc_this_oldid int(10) unsigned NOT NULL default '0', + + -- rev_id of the prior revision, for generating diff links. + rc_last_oldid int(10) unsigned NOT NULL default '0', + + -- These may no longer be used, with the new move log. + rc_type tinyint(3) unsigned NOT NULL default '0', + rc_moved_to_ns tinyint(3) unsigned NOT NULL default '0', + rc_moved_to_title varchar(255) binary NOT NULL default '', + + -- If the Recent Changes Patrol option is enabled, + -- users may mark edits as having been reviewed to + -- remove a warning flag on the RC list. + -- A value of 1 indicates the page has been reviewed. + rc_patrolled tinyint(3) unsigned NOT NULL default '0', + + -- Recorded IP address the edit was made from, if the + -- $wgPutIPinRC option is enabled. + rc_ip char(15) NOT NULL default '', + + PRIMARY KEY rc_id (rc_id), + INDEX rc_timestamp (rc_timestamp), + INDEX rc_namespace_title (rc_namespace, rc_title), + INDEX rc_cur_id (rc_cur_id), + INDEX new_name_timestamp(rc_new,rc_namespace,rc_timestamp), + INDEX rc_ip (rc_ip) + +) TYPE=InnoDB; + +CREATE TABLE /*$wgDBprefix*/watchlist ( + -- Key to user.user_id + wl_user int(5) unsigned NOT NULL, + + -- Key to page_namespace/page_title + -- Note that users may watch pages which do not exist yet, + -- or existed in the past but have been deleted. + wl_namespace int NOT NULL default '0', + wl_title varchar(255) binary NOT NULL default '', + + -- Timestamp when user was last sent a notification e-mail; + -- cleared when the user visits the page. + wl_notificationtimestamp varchar(14) binary, + + UNIQUE KEY (wl_user, wl_namespace, wl_title), + KEY namespace_title (wl_namespace,wl_title) + +) TYPE=InnoDB; + + +-- +-- Used by the math module to keep track +-- of previously-rendered items. +-- +CREATE TABLE /*$wgDBprefix*/math ( + -- Binary MD5 hash of the latex fragment, used as an identifier key. + math_inputhash varchar(16) NOT NULL, + + -- Not sure what this is, exactly... + math_outputhash varchar(16) NOT NULL, + + -- texvc reports how well it thinks the HTML conversion worked; + -- if it's a low level the PNG rendering may be preferred. + math_html_conservativeness tinyint(1) NOT NULL, + + -- HTML output from texvc, if any + math_html text, + + -- MathML output from texvc, if any + math_mathml text, + + UNIQUE KEY math_inputhash (math_inputhash) + +) TYPE=InnoDB; + +-- +-- When using the default MySQL search backend, page titles +-- and text are munged to strip markup, do Unicode case folding, +-- and prepare the result for MySQL's fulltext index. +-- +-- This table must be MyISAM; InnoDB does not support the needed +-- fulltext index. +-- +CREATE TABLE /*$wgDBprefix*/searchindex ( + -- Key to page_id + si_page int(8) unsigned NOT NULL, + + -- Munged version of title + si_title varchar(255) NOT NULL default '', + + -- Munged version of body text + si_text mediumtext NOT NULL default '', + + UNIQUE KEY (si_page), + FULLTEXT si_title (si_title), + FULLTEXT si_text (si_text) + +) TYPE=MyISAM; + +-- +-- Recognized interwiki link prefixes +-- +CREATE TABLE /*$wgDBprefix*/interwiki ( + -- The interwiki prefix, (e.g. "Meatball", or the language prefix "de") + iw_prefix char(32) NOT NULL, + + -- The URL of the wiki, with "$1" as a placeholder for an article name. + -- Any spaces in the name will be transformed to underscores before + -- insertion. + iw_url char(127) NOT NULL, + + -- A boolean value indicating whether the wiki is in this project + -- (used, for example, to detect redirect loops) + iw_local BOOL NOT NULL, + + -- Boolean value indicating whether interwiki transclusions are allowed. + iw_trans TINYINT(1) NOT NULL DEFAULT 0, + + UNIQUE KEY iw_prefix (iw_prefix) + +) TYPE=InnoDB; + +-- +-- Used for caching expensive grouped queries +-- +CREATE TABLE /*$wgDBprefix*/querycache ( + -- A key name, generally the base name of of the special page. + qc_type char(32) NOT NULL, + + -- Some sort of stored value. Sizes, counts... + qc_value int(5) unsigned NOT NULL default '0', + + -- Target namespace+title + qc_namespace int NOT NULL default '0', + qc_title char(255) binary NOT NULL default '', + + KEY (qc_type,qc_value) + +) TYPE=InnoDB; + +-- +-- For a few generic cache operations if not using Memcached +-- +CREATE TABLE /*$wgDBprefix*/objectcache ( + keyname char(255) binary not null default '', + value mediumblob, + exptime datetime, + unique key (keyname), + key (exptime) + +) TYPE=InnoDB; + +-- +-- Cache of interwiki transclusion +-- +CREATE TABLE /*$wgDBprefix*/transcache ( + tc_url VARCHAR(255) NOT NULL, + tc_contents TEXT, + tc_time INT NOT NULL, + UNIQUE INDEX tc_url_idx(tc_url) +) TYPE=InnoDB; + +CREATE TABLE /*$wgDBprefix*/logging ( + -- Symbolic keys for the general log type and the action type + -- within the log. The output format will be controlled by the + -- action field, but only the type controls categorization. + log_type char(10) NOT NULL default '', + log_action char(10) NOT NULL default '', + + -- Timestamp. Duh. + log_timestamp char(14) NOT NULL default '19700101000000', + + -- The user who performed this action; key to user_id + log_user int unsigned NOT NULL default 0, + + -- Key to the page affected. Where a user is the target, + -- this will point to the user page. + log_namespace int NOT NULL default 0, + log_title varchar(255) binary NOT NULL default '', + + -- Freeform text. Interpreted as edit history comments. + log_comment varchar(255) NOT NULL default '', + + -- LF separated list of miscellaneous parameters + log_params blob NOT NULL default '', + + KEY type_time (log_type, log_timestamp), + KEY user_time (log_user, log_timestamp), + KEY page_time (log_namespace, log_title, log_timestamp), + KEY times (log_timestamp) + +) TYPE=InnoDB; + +CREATE TABLE /*$wgDBprefix*/trackbacks ( + tb_id integer AUTO_INCREMENT PRIMARY KEY, + tb_page integer REFERENCES page(page_id) ON DELETE CASCADE, + tb_title varchar(255) NOT NULL, + tb_url varchar(255) NOT NULL, + tb_ex text, + tb_name varchar(255), + + INDEX (tb_page) +) TYPE=InnoDB; + + +-- Jobs performed by parallel apache threads or a command-line daemon +CREATE TABLE /*$wgDBprefix*/job ( + job_id int(9) unsigned NOT NULL auto_increment, + + -- Command name, currently only refreshLinks is defined + job_cmd varchar(255) NOT NULL default '', + + -- Namespace and title to act on + -- Should be 0 and '' if the command does not operate on a title + job_namespace int NOT NULL, + job_title varchar(255) binary NOT NULL, + + -- Any other parameters to the command + -- Presently unused, format undefined + job_params blob NOT NULL default '', + + PRIMARY KEY job_id (job_id), + KEY (job_cmd, job_namespace, job_title) +) TYPE=InnoDB; + + +-- Details of updates to cached special pages +CREATE TABLE /*$wgDBprefix*/querycache_info ( + + -- Special page name + -- Corresponds to a qc_type value + qci_type varchar(32) NOT NULL default '', + + -- Timestamp of last update + qci_timestamp char(14) NOT NULL default '19700101000000', + + UNIQUE KEY ( qci_type ) + +) TYPE=InnoDB; diff --git a/maintenance/transstat.php b/maintenance/transstat.php new file mode 100644 index 00000000..e54a668c --- /dev/null +++ b/maintenance/transstat.php @@ -0,0 +1,203 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + * + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @author Ashar Voultoiz <thoane@altern.org> + * @bug 2499 + * + * Output is posted from time to time on: + * http://meta.wikimedia.org/wiki/Localization_statistics + */ + +/** */ +require_once('commandLine.inc'); +require_once('languages.inc'); + +if( isset($options['help']) ) { usage(); wfDie(); } +// default output is WikiText +if( !isset($options['output']) ) { $options['output']='wiki'; } + + +/** Print a usage message*/ +function usage() { +print <<<END +Usage: php transstat.php [--help] [--output:csv|text|wiki] [--showdupes] + --help : this helpful message + --showold : show old messages that are not in Messages.php + --output : select an output engine one of: + * 'csv' : Comma Separated Values. + * 'none' : Nothing, usefull with --showdupes + * 'wiki' : MediaWiki syntax (default). + * 'text' : Text with tabs. +Example: php transstat.php --showdupes --output=none + + +END; +} + + +/** A general output object. Need to be overriden */ +class statsOutput { + var $output; // buffer that contain the text + function statsOutput() { $this->output='';} + function getContent() { return $this->output;} + + function formatPercent($subset, $total, $revert=false, $accuracy=2) { + return @sprintf( '%.' . $accuracy . 'f%%', 100 * $subset / $total ); + } + + // Override the next methods + function heading() {} + function footer() {} + function blockstart() {} + function blockend() {} + function element($in, $heading=false) {} +} + +/** Outputs nothing ! */ +class noneStatsOutput extends statsOutput { + function getContent() { return NULL;} +} + +/** Outputs WikiText */ +class wikiStatsOutput extends statsOutput { + function heading() { + $this->output .= "{| border=2 cellpadding=4 cellspacing=0 style=\"background: #f9f9f9; border: 1px #aaa solid; border-collapse: collapse;\" width=100%\n"; + } + function footer() { $this->output .= "|}\n"; } + function blockstart() { $this->output .= "|-\n"; } + function blockend() { $this->output .= ''; } + function element($in, $heading = false) { + $this->output .= ($heading ? '!' : '|') . " $in\n"; + } + function formatPercent($subset, $total, $revert=false, $accuracy=2) { + $v = @round(255 * $subset / $total); + if($revert) $v = 255 - $v; + if($v < 128) { + // red to yellow + $red = 'FF'; + $green = sprintf('%02X', 2*$v); + } else { + // yellow to green + $red = sprintf('%02X', 2*(255 -$v) ); + $green = 'FF'; + } + $blue = '00'; + $color = $red.$green.$blue; + + $percent = statsOutput::formatPercent($subset, $total, $revert, $accuracy); + return 'bgcolor="#'.$color.'" | '.$percent; + } +} + +/** Output text. To be used on a terminal for example. */ +class textStatsOutput extends statsOutput { + function element($in, $heading = false) { + $this->output .= $in."\t"; + } + function blockend(){ $this->output .="\n";} +} + +/** csv output. Some people love excel */ +class csvStatsOutput extends statsOutput { + function element($in, $heading = false) { + $this->output .= $in.";"; + } + function blockend(){ $this->output .="\n";} +} + + +function redundant(&$arr, $langcode) { + global $wgAllMessagesEn; + + $redundant = 0; + foreach(array_keys($arr) as $key) { + if ( @$wgAllMessagesEn[$key] === null ) { + global $options; + if( isset($options['showold']) ) { + print "Deprecated [$langcode]: $key\n"; + } + ++$redundant; + } + } + return $redundant; +} + +// Select an output engine +switch ($options['output']) { + case 'csv': + $out = new csvStatsOutput(); break; + case 'none': + $out = new noneStatsOutput(); break; + case 'text': + $out = new textStatsOutput(); break; + case 'wiki': + $out = new wikiStatsOutput(); break; + default: + usage(); wfDie(); + break; +} + +$langTool = new languages(); + +// Load message and compute stuff +$msgs = array(); +foreach($langTool->getList() as $langcode) { + // Since they aren't loaded by default.. + require_once( 'languages/Language' . $langcode . '.php' ); + $arr = 'wgAllMessages'.$langcode; + if(@is_array($$arr)) { + $msgs[$wgContLang->lcfirst($langcode)] = array( + 'total' => count($$arr), + 'redundant' => redundant($$arr, $langcode), + ); + } else { + $msgs[$wgContLang->lcfirst($langcode)] = array( + 'total' => 0, + 'redundant' => 0, + ); + } +} + +// Top entry +$out->heading(); +$out->blockstart(); +$out->element('Language', true); +$out->element('Translated', true); +$out->element('%', true); +$out->element('Untranslated', true); +$out->element('%', true); +$out->element('Redundant', true); +$out->element('%', true); +$out->blockend(); + +// Generate rows +foreach($msgs as $lang => $stats) { + $out->blockstart(); + // Language + $out->element($wgContLang->getLanguageName(strtr($lang, '_', '-')) . " ($lang)"); + // Translated + $out->element($stats['total'] . '/' . $msgs['en']['total']); + // % Translated + $out->element($out->formatPercent($stats['total'], $msgs['en']['total'])); + // Untranslated + $out->element($msgs['en']['total'] - $stats['total']); + // % Untranslated + $out->element($out->formatPercent($msgs['en']['total'] - $stats['total'], $msgs['en']['total'], true)); + // Redundant & % Redundant + if($stats['redundant'] =='NC') { + $out->element('NC'); + $out->element('NC'); + } else { + $out->element($stats['redundant'] . '/' . $stats['total']); + $out->element($out->formatPercent($stats['redundant'], $stats['total'],true)); + } + $out->blockend(); +} +$out->footer(); + +// Final output +echo $out->getContent(); +?> diff --git a/maintenance/trivialCmdLine.php b/maintenance/trivialCmdLine.php new file mode 100644 index 00000000..2f12815f --- /dev/null +++ b/maintenance/trivialCmdLine.php @@ -0,0 +1,21 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +require_once( "commandLine.inc" ); +# print "DB name: $wgDBname\n"; +# print "DB user: $wgDBuser\n"; +# print "DB password: $wgDBpassword\n"; + +print "This is an example command-line maintenance script.\n"; + +$dbr =& wfGetDB( DB_SLAVE ); +$page = $dbr->tableName( 'page' ); +$res = $dbr->query( "SELECT MAX(page_id) as m FROM $page" ); +$row = $dbr->fetchObject( $res ); +print "Max page_id: {$row->m}\n"; + +?> diff --git a/maintenance/update.php b/maintenance/update.php new file mode 100644 index 00000000..8643aa79 --- /dev/null +++ b/maintenance/update.php @@ -0,0 +1,71 @@ +<?php +require_once 'counter.php'; +/** + * Run all updaters. + * + * @todo document + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$wgUseMasterForMaintenance = true; +$options = array( 'quick' ); +require_once( "commandLine.inc" ); +require_once( "updaters.inc" ); +$wgTitle = Title::newFromText( "MediaWiki database updater" ); +$dbclass = 'Database' . ucfirst( $wgDBtype ) ; +require_once("$dbclass.php"); +$dbc = new $dbclass; + +echo( "MediaWiki {$wgVersion} Updater\n\n" ); + +# Do a pre-emptive check to ensure we've got credentials supplied +# We can't, at this stage, check them, but we can detect their absence, +# which seems to cause most of the problems people whinge about +if( !isset( $wgDBadminuser ) || !isset( $wgDBadminpassword ) ) { + echo( "No superuser credentials could be found. Please provide the details\n" ); + echo( "of a user with appropriate permissions to update the database. See\n" ); + echo( "AdminSettings.sample for more details.\n\n" ); + exit(); +} + +# Attempt to connect to the database as a privileged user +# This will vomit up an error if there are permissions problems +$wgDatabase = $dbc->newFromParams( $wgDBserver, $wgDBadminuser, $wgDBadminpassword, $wgDBname, 1 ); + +if( !$wgDatabase->isOpen() ) { + # Appears to have failed + echo( "A connection to the database could not be established. Check the\n" ); + # Let's be a bit clever and guess at what's wrong + if( isset( $wgDBadminuser ) && isset( $wgDBadminpassword ) ) { + # Tell the user the value(s) are wrong + echo( 'values of $wgDBadminuser and $wgDBadminpassword.' . "\n" ); + } + exit(); +} + +print "Going to run database updates for $wgDBname\n"; +print "Depending on the size of your database this may take a while!\n"; + +if( !isset( $options['quick'] ) ) { + print "Abort with control-c in the next five seconds... "; + + for ($i = 6; $i >= 1;) { + print_c($i, --$i); + sleep(1); + } + echo "\n"; +} + +if ( isset( $options['doshared'] ) ) { + $doShared = true; +} else { + $doShared = false; +} + +do_all_updates( $doShared ); + +print "Done.\n"; + +?> diff --git a/maintenance/updateArticleCount.inc.php b/maintenance/updateArticleCount.inc.php new file mode 100644 index 00000000..20546a78 --- /dev/null +++ b/maintenance/updateArticleCount.inc.php @@ -0,0 +1,68 @@ +<?php + +/** + * Support class for the updateArticleCount.php maintenance script + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +class ArticleCounter { + + var $dbr; + var $namespaces; + + function ArticleCounter() { + global $wgContentNamespaces; + $this->namespaces = $wgContentNamespaces; + $this->dbr =& wfGetDB( DB_SLAVE ); + } + + /** + * Produce a comma-delimited set of namespaces + * Includes paranoia + * + * @return string + */ + function makeNsSet() { + foreach( $this->namespaces as $namespace ) + $namespaces[] = intval( $namespace ); + return implode( ', ', $namespaces ); + } + + /** + * Produce SQL for the query + * + * @return string + */ + function makeSql() { + extract( $this->dbr->tableNames( 'page', 'pagelinks' ) ); + $nsset = $this->makeNsSet(); + return "SELECT COUNT(*) AS count FROM {$page} + LEFT JOIN {$pagelinks} ON pl_from = page_id + WHERE page_namespace IN ( $nsset ) + AND page_is_redirect = 0 + AND page_len > 0 + AND pl_namespace IS NOT NULL"; + } + + /** + * Count the number of valid content pages in the wiki + * + * @return mixed Integer, or false if there's a problem + */ + function count() { + $res = $this->dbr->query( $this->makeSql(), __METHOD__ ); + if( $res ) { + $row = $this->dbr->fetchObject( $res ); + $this->dbr->freeResult( $res ); + return (int)$row->count; + } else { + return false; # Look out for this when handling the result + } + } + +} + +?>
\ No newline at end of file diff --git a/maintenance/updateArticleCount.php b/maintenance/updateArticleCount.php new file mode 100644 index 00000000..112274d2 --- /dev/null +++ b/maintenance/updateArticleCount.php @@ -0,0 +1,42 @@ +<?php + +/** + * Maintenance script to provide a better count of the number of articles + * and update the site statistics table, if desired + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +$options = array( 'update', 'help' ); +require_once( 'commandLine.inc' ); +require_once( 'updateArticleCount.inc.php' ); +echo( "Update Article Count\n\n" ); + +if( isset( $options['help'] ) && $options['help'] ) { + echo( "Usage: php updateArticleCount.php [--update]\n\n" ); + echo( "--update : Update site statistics table\n" ); + exit( 0 ); +} + +echo( "Counting articles..." ); +$counter = new ArticleCounter(); +$result = $counter->count(); + +if( $result !== false ) { + echo( "found {$result}.\n" ); + if( isset( $options['update'] ) && $options['update'] ) { + echo( "Updating site statistics table... " ); + $dbw =& wfGetDB( DB_MASTER ); + $dbw->update( 'site_stats', array( 'ss_good_articles' => $result ), array( 'ss_row_id' => 1 ), __METHOD__ ); + echo( "done.\n" ); + } else { + echo( "To update the site statistics table, run the script with the --update option.\n" ); + } +} else { + echo( "failed.\n" ); +} +echo( "\n" ); + +?>
\ No newline at end of file diff --git a/maintenance/updateSearchIndex.inc b/maintenance/updateSearchIndex.inc new file mode 100644 index 00000000..ed01575c --- /dev/null +++ b/maintenance/updateSearchIndex.inc @@ -0,0 +1,115 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +function updateSearchIndex( $start, $end, $maxLockTime, $quiet ) { + global $wgQuiet; + global $wgDisableSearchUpdate; + + $fname = "updateSearchIndex"; + + $wgQuiet = $quiet; + $wgDisableSearchUpdate = false; + + $dbw =& wfGetDB( DB_MASTER ); + $recentchanges = $dbw->tableName( 'recentchanges' ); + + output( "Updating searchindex between $start and $end\n" ); + + # Select entries from recentchanges which are on top and between the specified times + $start = $dbw->strencode( $start ); + $end = $dbw->strencode( $end ); + + $page = $dbw->tableName( 'page' ); + $sql = "SELECT rc_cur_id,rc_type,rc_moved_to_ns,rc_moved_to_title FROM $recentchanges + JOIN $page ON rc_cur_id=page_id AND rc_this_oldid=page_latest + WHERE rc_timestamp BETWEEN '$start' AND '$end' + "; + $res = $dbw->query( $sql, $fname ); + + + # Lock searchindex + if ( $maxLockTime ) { + output( " --- Waiting for lock ---" ); + lockSearchindex( $dbw ); + $lockTime = time(); + output( "\n" ); + } + + # Loop through the results and do a search update + while ( $row = $dbw->fetchObject( $res ) ) { + # Allow reads to be processed + if ( $maxLockTime && time() > $lockTime + $maxLockTime ) { + output( " --- Relocking ---" ); + relockSearchindex( $dbw ); + $lockTime = time(); + output( "\n" ); + } + if ( $row->rc_type == RC_LOG ) { + continue; + } elseif ( $row->rc_type == RC_MOVE || $row->rc_type == RC_MOVE_OVER_REDIRECT ) { + # Rename searchindex entry + $titleObj = Title::makeTitle( $row->rc_moved_to_ns, $row->rc_moved_to_title ); + $title = $titleObj->getPrefixedDBkey(); + output( "$title..." ); + $u = new SearchUpdate( $row->rc_cur_id, $title, false ); + output( "\n" ); + } else { + // Get current revision + $rev = Revision::loadFromPageId( $dbw, $row->rc_cur_id ); + if( $rev ) { + $titleObj = $rev->getTitle(); + $title = $titleObj->getPrefixedDBkey(); + output( $title ); + # Update searchindex + $u = new SearchUpdate( $row->rc_cur_id, $titleObj->getText(), $rev->getText() ); + $u->doUpdate(); + output( "\n" ); + } + } + } + + # Unlock searchindex + if ( $maxLockTime ) { + unlockSearchindex( $dbw ); + } + output( "Done\n" ); +} + +function lockSearchindex( &$db ) { + $write = array( 'searchindex' ); + $read = array( 'page', 'revision', 'text', 'interwiki' ); + $items = array(); + + foreach( $write as $table ) { + $items[] = $db->tableName( $table ) . ' LOW_PRIORITY WRITE'; + } + foreach( $read as $table ) { + $items[] = $db->tableName( $table ) . ' READ'; + } + $sql = "LOCK TABLES " . implode( ',', $items ); + $db->query( $sql ); +} + +function unlockSearchindex( &$db ) { + $db->query( "UNLOCK TABLES" ); +} + +# Unlock and lock again +# Since the lock is low-priority, queued reads will be able to complete +function relockSearchindex( &$db ) { + unlockSearchindex( $db ); + lockSearchindex( $db ); +} + +function output( $text ) { + global $wgQuiet; + if ( !$wgQuiet ) { + print $text; + } +} + +?> diff --git a/maintenance/updateSearchIndex.php b/maintenance/updateSearchIndex.php new file mode 100644 index 00000000..b03dc00d --- /dev/null +++ b/maintenance/updateSearchIndex.php @@ -0,0 +1,57 @@ +<?php +/** + * Script for periodic off-peak updating of the search index + * + * Usage: php updateSearchIndex.php [-s START] [-e END] [-p POSFILE] [-l LOCKTIME] [-q] + * Where START is the starting timestamp + * END is the ending timestamp + * POSFILE is a file to load timestamps from and save them to, searchUpdate.pos by default + * LOCKTIME is how long the searchindex and cur tables will be locked for + * -q means quiet + * + * @package MediaWiki + * @subpackage Maintenance + */ + +/** */ +$optionsWithArgs = array( 's', 'e', 'p' ); + +require_once( 'commandLine.inc' ); +require_once( 'updateSearchIndex.inc' ); + +if ( isset( $options['p'] ) ) { + $posFile = $options['p']; +} else { + $posFile = 'searchUpdate.pos'; +} + +if ( isset( $options['e'] ) ) { + $end = $options['e']; +} else { + $end = wfTimestampNow(); +} + +if ( isset( $options['s'] ) ) { + $start = $options['s']; +} else { + $start = @file_get_contents( $posFile ); + if ( !$start ) { + $start = wfTimestamp( TS_MW, time() - 86400 ); + } +} + +if ( isset( $options['l'] ) ) { + $lockTime = $options['l']; +} else { + $lockTime = 20; +} + +$quiet = (bool)(@$options['q']); + +updateSearchIndex( $start, $end, $lockTime, $quiet ); + +$file = fopen( $posFile, 'w' ); +fwrite( $file, $end ); +fclose( $file ); + +?> diff --git a/maintenance/updateSpecialPages.php b/maintenance/updateSpecialPages.php new file mode 100644 index 00000000..71c688fc --- /dev/null +++ b/maintenance/updateSpecialPages.php @@ -0,0 +1,96 @@ +<?php + +# Run this script periodically if you have miser mode enabled, to refresh the caches +$options = array('only','help'); + +require_once( 'commandLine.inc' ); + +require_once( 'SpecialPage.php' ); +require_once( 'QueryPage.php' ); + +if(@$options['help']) { + print "usage:updateSpecialPages.php [--help] [--only=page]\n"; + print " --help : this help message\n"; + print " --list : list special pages names\n"; + print " --only=page : only update 'page'. Ex: --only=BrokenRedirects\n"; + wfDie(); +} + +$wgOut->disable(); +$dbw =& wfGetDB( DB_MASTER ); + +foreach ( $wgQueryPages as $page ) { + @list( $class, $special, $limit ) = $page; + + # --list : just show the name of pages + if( @$options['list'] ) { + print "$special\n"; + continue; + } + + $specialObj = SpecialPage::getPage( $special ); + if ( !$specialObj ) { + print "No such special page: $special\n"; + exit; + } + $file = $specialObj->getFile(); + if ( $file ) { + require_once( $file ); + } + $queryPage = new $class; + + if( !(isset($options['only'])) or ($options['only'] == $queryPage->getName()) ) { + printf( '%-30s', $special ); + + if ( $queryPage->isExpensive() ) { + $t1 = explode( ' ', microtime() ); + # Do the query + $num = $queryPage->recache( $limit === null ? 1000 : $limit ); + $t2 = explode( ' ', microtime() ); + + if ( $num === false ) { + print "FAILED: database error\n"; + } else { + print "got $num rows in "; + + $elapsed = ($t2[0] - $t1[0]) + ($t2[1] - $t1[1]); + $hours = intval( $elapsed / 3600 ); + $minutes = intval( $elapsed % 3600 / 60 ); + $seconds = $elapsed - $hours * 3600 - $minutes * 60; + if ( $hours ) { + print $hours . 'h '; + } + if ( $minutes ) { + print $minutes . 'm '; + } + printf( "%.2fs\n", $seconds ); + } + + # Reopen any connections that have closed + if ( !$wgLoadBalancer->pingAll()) { + print "\n"; + do { + print "Connection failed, reconnecting in 10 seconds...\n"; + sleep(10); + } while ( !$wgLoadBalancer->pingAll() ); + print "Reconnected\n\n"; + } else { + # Commit the results + $dbw->immediateCommit(); + } + + # Wait for the slave to catch up + $slaveDB =& wfGetDB( DB_SLAVE, array('QueryPage::recache', 'vslow' ) ); + while( $slaveDB->getLag() > 600 ) { + print "Slave lagged, waiting...\n"; + sleep(30); + + } + + } else { + print "cheap, skipped\n"; + } + } +} + +?> diff --git a/maintenance/updaters.inc b/maintenance/updaters.inc new file mode 100644 index 00000000..164a00cf --- /dev/null +++ b/maintenance/updaters.inc @@ -0,0 +1,835 @@ +<?php +/** + * @package MediaWiki + * @subpackage Maintenance + */ + + /** */ + +require_once 'convertLinks.inc'; +require_once 'InitialiseMessages.inc'; +require_once 'userDupes.inc'; + +$wgRenamedTables = array( +# from to patch file +# array( 'group', 'groups', 'patch-rename-group.sql' ), +); + +$wgNewTables = array( +# table patch file (in maintenance/archives) + array( 'hitcounter', 'patch-hitcounter.sql' ), + array( 'querycache', 'patch-querycache.sql' ), + array( 'objectcache', 'patch-objectcache.sql' ), + array( 'categorylinks', 'patch-categorylinks.sql' ), + array( 'logging', 'patch-logging.sql' ), + array( 'user_newtalk', 'patch-usernewtalk2.sql' ), + array( 'transcache', 'patch-transcache.sql' ), + array( 'trackbacks', 'patch-trackbacks.sql' ), + array( 'externallinks', 'patch-externallinks.sql' ), + array( 'job', 'patch-job.sql' ), + array( 'langlinks', 'patch-langlinks.sql' ), + array( 'querycache_info', 'patch-querycacheinfo.sql' ), + array( 'filearchive', 'patch-filearchive.sql' ), +); + +$wgNewFields = array( +# table field patch file (in maintenance/archives) + array( 'ipblocks', 'ipb_id', 'patch-ipblocks.sql' ), + array( 'ipblocks', 'ipb_expiry', 'patch-ipb_expiry.sql' ), + array( 'recentchanges', 'rc_type', 'patch-rc_type.sql' ), + array( 'recentchanges', 'rc_ip', 'patch-rc_ip.sql' ), + array( 'recentchanges', 'rc_id', 'patch-rc_id.sql' ), + array( 'recentchanges', 'rc_patrolled', 'patch-rc-patrol.sql' ), + array( 'user', 'user_real_name', 'patch-user-realname.sql' ), + array( 'user', 'user_token', 'patch-user_token.sql' ), + array( 'user', 'user_email_token', 'patch-user_email_token.sql' ), + array( 'user', 'user_registration','patch-user_registration.sql' ), + array( 'logging', 'log_params', 'patch-log_params.sql' ), + array( 'archive', 'ar_rev_id', 'patch-archive-rev_id.sql' ), + array( 'archive', 'ar_text_id', 'patch-archive-text_id.sql' ), + array( 'page', 'page_len', 'patch-page_len.sql' ), + array( 'revision', 'rev_deleted', 'patch-rev_deleted.sql' ), + array( 'image', 'img_width', 'patch-img_width.sql' ), + array( 'image', 'img_metadata', 'patch-img_metadata.sql' ), + array( 'image', 'img_media_type', 'patch-img_media_type.sql' ), + array( 'site_stats', 'ss_total_pages', 'patch-ss_total_articles.sql' ), + array( 'interwiki', 'iw_trans', 'patch-interwiki-trans.sql' ), + array( 'ipblocks', 'ipb_range_start', 'patch-ipb_range_start.sql' ), + array( 'site_stats', 'ss_images', 'patch-ss_images.sql' ), +); + +function rename_table( $from, $to, $patch ) { + global $wgDatabase; + if ( $wgDatabase->tableExists( $from ) ) { + if ( $wgDatabase->tableExists( $to ) ) { + echo "...can't move table $from to $to, $to already exists.\n"; + } else { + echo "Moving table $from to $to..."; + dbsource( archive($patch), $wgDatabase ); + echo "ok\n"; + } + } else { + // Source table does not exist + // Renames are done before creations, so this is typical for a new installation + // Ignore silently + } +} + +function add_table( $name, $patch ) { + global $wgDatabase; + if ( $wgDatabase->tableExists( $name ) ) { + echo "...$name table already exists.\n"; + } else { + echo "Creating $name table..."; + dbsource( archive($patch), $wgDatabase ); + echo "ok\n"; + } +} + +function add_field( $table, $field, $patch ) { + global $wgDatabase; + if ( !$wgDatabase->tableExists( $table ) ) { + echo "...$table table does not exist, skipping new field patch\n"; + } elseif ( $wgDatabase->fieldExists( $table, $field ) ) { + echo "...have $field field in $table table.\n"; + } else { + echo "Adding $field field to table $table..."; + dbsource( archive($patch) , $wgDatabase ); + echo "ok\n"; + } +} + +function do_revision_updates() { + global $wgSoftwareRevision; + if ( $wgSoftwareRevision < 1001 ) { + update_passwords(); + } +} + +function update_passwords() { + wfDebugDieBacktrace( "This function needs to be updated or removed.\n" ); + + global $wgDatabase; + $fname = "Update script: update_passwords()"; + print "\nIt appears that you need to update the user passwords in your\n" . + "database. If you have already done this (if you've run this update\n" . + "script once before, for example), doing so again will make all your\n" . + "user accounts inaccessible, so be sure you only do this once.\n" . + "Update user passwords? (yes/no)"; + + $resp = readconsole(); + if ( ! ( "Y" == $resp{0} || "y" == $resp{0} ) ) { return; } + + $sql = "SELECT user_id,user_password FROM user"; + $source = $wgDatabase->query( $sql, $fname ); + + while ( $row = $wgDatabase->fetchObject( $source ) ) { + $id = $row->user_id; + $oldpass = $row->user_password; + $newpass = md5( "{$id}-{$oldpass}" ); + + $sql = "UPDATE user SET user_password='{$newpass}' " . + "WHERE user_id={$id}"; + $wgDatabase->query( $sql, $fname ); + } +} + +function do_interwiki_update() { + # Check that interwiki table exists; if it doesn't source it + global $wgDatabase, $IP; + if( $wgDatabase->tableExists( "interwiki" ) ) { + echo "...already have interwiki table\n"; + return true; + } + echo "Creating interwiki table: "; + dbsource( archive("patch-interwiki.sql") ); + echo "ok\n"; + echo "Adding default interwiki definitions: "; + dbsource( "$IP/maintenance/interwiki.sql" ); + echo "ok\n"; +} + +function do_index_update() { + # Check that proper indexes are in place + global $wgDatabase; + $meta = $wgDatabase->fieldInfo( "recentchanges", "rc_timestamp" ); + if( $meta->multiple_key == 0 ) { + echo "Updating indexes to 20031107: "; + dbsource( archive("patch-indexes.sql") ); + echo "ok\n"; + return true; + } + echo "...indexes seem up to 20031107 standards\n"; + return false; +} + +function do_image_index_update() { + global $wgDatabase; + + $meta = $wgDatabase->fieldInfo( "image", "img_major_mime" ); + if( $meta->multiple_key == 0 ) { + echo "Updating indexes to 20050912: "; + dbsource( archive("patch-mimesearch-indexes.sql") ); + echo "ok\n"; + return true; + } + echo "...indexes seem up to 20050912 standards\n"; + return false; +} + +function do_image_name_unique_update() { + global $wgDatabase; + if( $wgDatabase->indexExists( 'image', 'PRIMARY' ) ) { + echo "...image primary key already set.\n"; + } else { + echo "Making img_name the primary key... "; + dbsource( archive("patch-image_name_primary.sql"), $wgDatabase ); + echo "ok\n"; + } +} + +function do_logging_timestamp_index() { + global $wgDatabase; + if( $wgDatabase->indexExists( 'logging', 'times' ) ) { + echo "...timestamp key on logging already exists.\n"; + } else { + echo "Adding timestamp key on logging table... "; + dbsource( archive("patch-logging-times-index.sql"), $wgDatabase ); + echo "ok\n"; + } +} + + +function do_watchlist_update() { + global $wgDatabase; + $fname = 'do_watchlist_update'; + if( $wgDatabase->fieldExists( 'watchlist', 'wl_notificationtimestamp' ) ) { + echo "The watchlist table is already set up for email notification.\n"; + } else { + echo "Adding wl_notificationtimestamp field for email notification management."; + /* ALTER TABLE watchlist ADD (wl_notificationtimestamp varchar(14) binary NOT NULL default '0'); */ + dbsource( archive( 'patch-email-notification.sql' ), $wgDatabase ); + echo "ok\n"; + } + # Check if we need to add talk page rows to the watchlist + $talk = $wgDatabase->selectField( 'watchlist', 'count(*)', 'wl_namespace & 1', $fname ); + $nontalk = $wgDatabase->selectField( 'watchlist', 'count(*)', 'NOT (wl_namespace & 1)', $fname ); + if ( $talk != $nontalk ) { + echo "Adding missing watchlist talk page rows... "; + flush(); + + $wgDatabase->insertSelect( 'watchlist', 'watchlist', + array( + 'wl_user' => 'wl_user', + 'wl_namespace' => 'wl_namespace | 1', + 'wl_title' => 'wl_title', + 'wl_notificationtimestamp' => 'wl_notificationtimestamp' + ), array( 'NOT (wl_namespace & 1)' ), $fname, 'IGNORE' ); + echo "ok\n"; + } else { + echo "...watchlist talk page rows already present\n"; + } +} + +function do_copy_newtalk_to_watchlist() { + global $wgDatabase; + global $wgCommandLineMode; # this needs to be saved while getID() and getName() are called + + $res = $wgDatabase->safeQuery( 'SELECT user_id, user_ip FROM !', + $wgDatabase->tableName( 'user_newtalk' ) ); + $num_newtalks=$wgDatabase->numRows($res); + echo "Now converting ".$num_newtalks." user_newtalk entries to watchlist table entries ... \n"; + + $user = new User(); + for ( $i = 1; $i <= $num_newtalks; $i++ ) { + $wluser = $wgDatabase->fetchObject( $res ); + if ($wluser->user_id == 0) { # anonymous users ... have IP numbers as "names" + if ($user->isIP($wluser->user_ip)) { # do only if it really looks like an IP number (double checked) + $wgDatabase->replace( 'watchlist', + array(array('wl_user','wl_namespace', 'wl_title', 'wl_notificationtimestamp' )), + array('wl_user' => 0, + 'wl_namespace' => NS_USER_TALK, + 'wl_title' => $wluser->user_ip, + 'wl_notificationtimestamp' => '19700101000000' + ), 'updaters.inc::do_watchlist_update2' + ); + } + } else { # normal users ... have user_ids + $user->setID($wluser->user_id); + $wgDatabase->replace( 'watchlist', + array(array('wl_user','wl_namespace', 'wl_title', 'wl_notificationtimestamp' )), + array('wl_user' => $user->getID(), + 'wl_namespace' => NS_USER_TALK, + 'wl_title' => $user->getName(), + 'wl_notificationtimestamp' => '19700101000000' + ), 'updaters.inc::do_watchlist_update3' + ); + } + } + echo "Done.\n"; +} + + +function do_user_update() { + global $wgDatabase; + if( $wgDatabase->fieldExists( 'user', 'user_emailauthenticationtimestamp' ) ) { + echo "User table contains old email authentication field. Dropping... "; + dbsource( archive( 'patch-email-authentication.sql' ), $wgDatabase ); + echo "ok\n"; + } else { + echo "...user table does not contain old email authentication field.\n"; + } +} + +/** + * 1.4 betas were missing the 'binary' marker from logging.log_title, + * which causes a collation mismatch error on joins in MySQL 4.1. + */ +function do_logging_encoding() { + global $wgDatabase, $wgDBtype; + if ($wgDBtype != 'mysql') + return; + $logging = $wgDatabase->tableName( 'logging' ); + $res = $wgDatabase->query( "SELECT log_title FROM $logging LIMIT 0" ); + $flags = explode( ' ', mysql_field_flags( $res, 0 ) ); + $wgDatabase->freeResult( $res ); + + if( in_array( 'binary', $flags ) ) { + echo "Logging table has correct title encoding.\n"; + } else { + echo "Fixing title encoding on logging table... "; + dbsource( archive( 'patch-logging-title.sql' ), $wgDatabase ); + echo "ok\n"; + } +} + +function do_schema_restructuring() { + global $wgDatabase; + $fname="do_schema_restructuring"; + if ( $wgDatabase->tableExists( 'page' ) ) { + echo "...page table already exists.\n"; + } else { + echo "...converting from cur/old to page/revision/text DB structure.\n"; flush(); + echo wfTimestamp(); + echo "......checking for duplicate entries.\n"; flush(); + + extract( $wgDatabase->tableNames( 'cur', 'old', 'page', 'revision', 'text' ) ); + + $rows = $wgDatabase->query( "SELECT cur_title, cur_namespace, COUNT(cur_namespace) AS c + FROM $cur GROUP BY cur_title, cur_namespace HAVING c>1", $fname ); + + if ( $wgDatabase->numRows( $rows ) > 0 ) { + echo wfTimestamp(); + echo "......<b>Found duplicate entries</b>\n"; + echo ( sprintf( "<b> %-60s %3s %5s</b>\n", 'Title', 'NS', 'Count' ) ); + while ( $row = $wgDatabase->fetchObject( $rows ) ) { + if ( ! isset( $duplicate[$row->cur_namespace] ) ) { + $duplicate[$row->cur_namespace] = array(); + } + $duplicate[$row->cur_namespace][] = $row->cur_title; + echo ( sprintf( " %-60s %3s %5s\n", $row->cur_title, $row->cur_namespace, $row->c ) ); + } + $sql = "SELECT cur_title, cur_namespace, cur_id, cur_timestamp FROM $cur WHERE "; + $firstCond = true; + foreach ( $duplicate as $ns => $titles ) { + if ( $firstCond ) { + $firstCond = false; + } else { + $sql .= ' OR '; + } + $sql .= "( cur_namespace = {$ns} AND cur_title in ("; + $first = true; + foreach ( $titles as $t ) { + if ( $first ) { + $sql .= $wgDatabase->addQuotes( $t ); + $first = false; + } else { + $sql .= ', ' . $wgDatabase->addQuotes( $t ); + } + } + $sql .= ") ) \n"; + } + # By sorting descending, the most recent entry will be the first in the list. + # All following entries will be deleted by the next while-loop. + $sql .= 'ORDER BY cur_namespace, cur_title, cur_timestamp DESC'; + + $rows = $wgDatabase->query( $sql, $fname ); + + $prev_title = $prev_namespace = false; + $deleteId = array(); + + while ( $row = $wgDatabase->fetchObject( $rows ) ) { + if ( $prev_title == $row->cur_title && $prev_namespace == $row->cur_namespace ) { + $deleteId[] = $row->cur_id; + } + $prev_title = $row->cur_title; + $prev_namespace = $row->cur_namespace; + } + $sql = "DELETE FROM $cur WHERE cur_id IN ( " . join( ',', $deleteId ) . ')'; + $rows = $wgDatabase->query( $sql, $fname ); + echo wfTimestamp(); + echo "......<b>Deleted</b> ".$wgDatabase->affectedRows()." records.\n"; + } + + + echo wfTimestamp(); + echo "......Creating tables.\n"; + $wgDatabase->query("CREATE TABLE $page ( + page_id int(8) unsigned NOT NULL auto_increment, + page_namespace int NOT NULL, + page_title varchar(255) binary NOT NULL, + page_restrictions tinyblob NOT NULL default '', + page_counter bigint(20) unsigned NOT NULL default '0', + page_is_redirect tinyint(1) unsigned NOT NULL default '0', + page_is_new tinyint(1) unsigned NOT NULL default '0', + page_random real unsigned NOT NULL, + page_touched char(14) binary NOT NULL default '', + page_latest int(8) unsigned NOT NULL, + page_len int(8) unsigned NOT NULL, + + PRIMARY KEY page_id (page_id), + UNIQUE INDEX name_title (page_namespace,page_title), + INDEX (page_random), + INDEX (page_len) + ) TYPE=InnoDB", $fname ); + $wgDatabase->query("CREATE TABLE $revision ( + rev_id int(8) unsigned NOT NULL auto_increment, + rev_page int(8) unsigned NOT NULL, + rev_comment tinyblob NOT NULL default '', + rev_user int(5) unsigned NOT NULL default '0', + rev_user_text varchar(255) binary NOT NULL default '', + rev_timestamp char(14) binary NOT NULL default '', + rev_minor_edit tinyint(1) unsigned NOT NULL default '0', + rev_deleted tinyint(1) unsigned NOT NULL default '0', + + PRIMARY KEY rev_page_id (rev_page, rev_id), + UNIQUE INDEX rev_id (rev_id), + INDEX rev_timestamp (rev_timestamp), + INDEX page_timestamp (rev_page,rev_timestamp), + INDEX user_timestamp (rev_user,rev_timestamp), + INDEX usertext_timestamp (rev_user_text,rev_timestamp) + ) TYPE=InnoDB", $fname ); + + echo wfTimestamp(); + echo "......Locking tables.\n"; + $wgDatabase->query( "LOCK TABLES $page WRITE, $revision WRITE, $old WRITE, $cur WRITE", $fname ); + + $maxold = intval( $wgDatabase->selectField( 'old', 'max(old_id)', '', $fname ) ); + echo wfTimestamp(); + echo "......maxold is {$maxold}\n"; + + echo wfTimestamp(); + global $wgLegacySchemaConversion; + if( $wgLegacySchemaConversion ) { + // Create HistoryBlobCurStub entries. + // Text will be pulled from the leftover 'cur' table at runtime. + echo "......Moving metadata from cur; using blob references to text in cur table.\n"; + $cur_text = "concat('O:18:\"historyblobcurstub\":1:{s:6:\"mCurId\";i:',cur_id,';}')"; + $cur_flags = "'object'"; + } else { + // Copy all cur text in immediately: this may take longer but avoids + // having to keep an extra table around. + echo "......Moving text from cur.\n"; + $cur_text = 'cur_text'; + $cur_flags = "''"; + } + $wgDatabase->query( "INSERT INTO $old (old_namespace, old_title, old_text, old_comment, old_user, old_user_text, + old_timestamp, old_minor_edit, old_flags) + SELECT cur_namespace, cur_title, $cur_text, cur_comment, cur_user, cur_user_text, cur_timestamp, cur_minor_edit, $cur_flags + FROM $cur", $fname ); + + echo wfTimestamp(); + echo "......Setting up revision table.\n"; + $wgDatabase->query( "INSERT INTO $revision (rev_id, rev_page, rev_comment, rev_user, rev_user_text, rev_timestamp, + rev_minor_edit) + SELECT old_id, cur_id, old_comment, old_user, old_user_text, + old_timestamp, old_minor_edit + FROM $old,$cur WHERE old_namespace=cur_namespace AND old_title=cur_title", $fname ); + + echo wfTimestamp(); + echo "......Setting up page table.\n"; + $wgDatabase->query( "INSERT INTO $page (page_id, page_namespace, page_title, page_restrictions, page_counter, + page_is_redirect, page_is_new, page_random, page_touched, page_latest, page_len) + SELECT cur_id, cur_namespace, cur_title, cur_restrictions, cur_counter, cur_is_redirect, cur_is_new, + cur_random, cur_touched, rev_id, LENGTH(cur_text) + FROM $cur,$revision + WHERE cur_id=rev_page AND rev_timestamp=cur_timestamp AND rev_id > {$maxold}", $fname ); + + echo wfTimestamp(); + echo "......Unlocking tables.\n"; + $wgDatabase->query( "UNLOCK TABLES", $fname ); + + echo wfTimestamp(); + echo "......Renaming old.\n"; + $wgDatabase->query( "ALTER TABLE $old RENAME TO $text", $fname ); + + echo wfTimestamp(); + echo "...done.\n"; + } +} + +function do_inverse_timestamp() { + global $wgDatabase; + $fname="do_schema_restructuring"; + if( $wgDatabase->fieldExists( 'revision', 'inverse_timestamp' ) ) { + echo "Removing revision.inverse_timestamp and fixing indexes... "; + dbsource( archive( 'patch-inverse_timestamp.sql' ), $wgDatabase ); + echo "ok\n"; + } else { + echo "revision timestamp indexes already up to 2005-03-13\n"; + } +} + +function do_text_id() { + global $wgDatabase; + if( $wgDatabase->fieldExists( 'revision', 'rev_text_id' ) ) { + echo "...rev_text_id already in place.\n"; + } else { + echo "Adding rev_text_id field... "; + dbsource( archive( 'patch-rev_text_id.sql' ), $wgDatabase ); + echo "ok\n"; + } +} + +function do_namespace_size() { + $tables = array( + 'page' => 'page', + 'archive' => 'ar', + 'recentchanges' => 'rc', + 'watchlist' => 'wl', + 'querycache' => 'qc', + 'logging' => 'log', + ); + foreach( $tables as $table => $prefix ) { + do_namespace_size_on( $table, $prefix ); + flush(); + } +} + +function do_namespace_size_on( $table, $prefix ) { + global $wgDatabase, $wgDBtype; + if ($wgDBtype != 'mysql') + return; + $field = $prefix . '_namespace'; + + $tablename = $wgDatabase->tableName( $table ); + $result = $wgDatabase->query( "SHOW COLUMNS FROM $tablename LIKE '$field'" ); + $info = $wgDatabase->fetchObject( $result ); + $wgDatabase->freeResult( $result ); + + if( substr( $info->Type, 0, 3 ) == 'int' ) { + echo "...$field is already a full int ($info->Type).\n"; + } else { + echo "Promoting $field from $info->Type to int... "; + + $sql = "ALTER TABLE $tablename MODIFY $field int NOT NULL"; + $wgDatabase->query( $sql ); + + echo "ok\n"; + } +} + +function do_pagelinks_update() { + global $wgDatabase; + if( $wgDatabase->tableExists( 'pagelinks' ) ) { + echo "...already have pagelinks table.\n"; + } else { + echo "Converting links and brokenlinks tables to pagelinks... "; + dbsource( archive( 'patch-pagelinks.sql' ), $wgDatabase ); + echo "ok\n"; + flush(); + + global $wgCanonicalNamespaceNames; + foreach( $wgCanonicalNamespaceNames as $ns => $name ) { + if( $ns != 0 ) { + do_pagelinks_namespace( $ns ); + } + } + } +} + +function do_pagelinks_namespace( $namespace ) { + global $wgDatabase, $wgContLang; + + $ns = intval( $namespace ); + echo "Cleaning up broken links for namespace $ns... "; + + $pagelinks = $wgDatabase->tableName( 'pagelinks' ); + $name = $wgContLang->getNsText( $ns ); + $prefix = $wgDatabase->strencode( $name ); + $likeprefix = str_replace( '_', '\\_', $prefix); + + $sql = "UPDATE $pagelinks + SET pl_namespace=$ns, + pl_title=TRIM(LEADING '$prefix:' FROM pl_title) + WHERE pl_namespace=0 + AND pl_title LIKE '$likeprefix:%'"; + + $wgDatabase->query( $sql, 'do_pagelinks_namespace' ); + echo "ok\n"; +} + +function do_drop_img_type() { + global $wgDatabase; + + if( $wgDatabase->fieldExists( 'image', 'img_type' ) ) { + echo "Dropping unused img_type field in image table... "; + dbsource( archive( 'patch-drop_img_type.sql' ), $wgDatabase ); + echo "ok\n"; + } else { + echo "No img_type field in image table; Good.\n"; + } +} + +function do_old_links_update() { + global $wgDatabase; + if( $wgDatabase->tableExists( 'pagelinks' ) ) { + echo "Already have pagelinks; skipping old links table updates.\n"; + } else { + convertLinks(); flush(); + } +} + +function do_user_unique_update() { + global $wgDatabase; + $duper = new UserDupes( $wgDatabase ); + if( $duper->hasUniqueIndex() ) { + echo "Already have unique user_name index.\n"; + } else { + if( !$duper->clearDupes() ) { + echo "WARNING: This next step will probably fail due to unfixed duplicates...\n"; + } + echo "Adding unique index on user_name... "; + dbsource( archive( 'patch-user_nameindex.sql' ), $wgDatabase ); + echo "ok\n"; + } +} + +function do_user_groups_update() { + $fname = 'do_user_groups_update'; + global $wgDatabase; + + if( $wgDatabase->tableExists( 'user_groups' ) ) { + echo "...user_groups table already exists.\n"; + return do_user_groups_reformat(); + } + + echo "Adding user_groups table... "; + dbsource( archive( 'patch-user_groups.sql' ), $wgDatabase ); + echo "ok\n"; + + if( !$wgDatabase->tableExists( 'user_rights' ) ) { + if( $wgDatabase->fieldExists( 'user', 'user_rights' ) ) { + echo "Upgrading from a 1.3 or older database? Breaking out user_rights for conversion..."; + dbsource( archive( 'patch-user_rights.sql' ), $wgDatabase ); + echo "ok\n"; + } else { + echo "*** WARNING: couldn't locate user_rights table or field for upgrade.\n"; + echo "*** You may need to manually configure some sysops by manipulating\n"; + echo "*** the user_groups table.\n"; + return; + } + } + + echo "Converting user_rights table to user_groups... "; + $result = $wgDatabase->select( 'user_rights', + array( 'ur_user', 'ur_rights' ), + array( "ur_rights != ''" ), + $fname ); + + while( $row = $wgDatabase->fetchObject( $result ) ) { + $groups = array_unique( + array_map( 'trim', + explode( ',', $row->ur_rights ) ) ); + + foreach( $groups as $group ) { + $wgDatabase->insert( 'user_groups', + array( + 'ug_user' => $row->ur_user, + 'ug_group' => $group ), + $fname ); + } + } + $wgDatabase->freeResult( $result ); + echo "ok\n"; +} + +function do_user_groups_reformat() { + # Check for bogus formats from previous 1.5 alpha code. + global $wgDatabase; + $info = $wgDatabase->fieldInfo( 'user_groups', 'ug_group' ); + + if( $info->type == 'int' ) { + $oldug = $wgDatabase->tableName( 'user_groups' ); + $newug = $wgDatabase->tableName( 'user_groups_bogus' ); + echo "user_groups is in bogus intermediate format. Renaming to $newug... "; + $wgDatabase->query( "ALTER TABLE $oldug RENAME TO $newug" ); + echo "ok\n"; + + echo "Re-adding fresh user_groups table... "; + dbsource( archive( 'patch-user_groups.sql' ), $wgDatabase ); + echo "ok\n"; + + echo "***\n"; + echo "*** WARNING: You will need to manually fix up user permissions in the user_groups\n"; + echo "*** table. Old 1.5 alpha versions did some pretty funky stuff...\n"; + echo "***\n"; + } else { + echo "...user_groups is in current format.\n"; + } + +} + +function do_watchlist_null() { + # Make sure wl_notificationtimestamp can be NULL, + # and update old broken items. + global $wgDatabase; + $info = $wgDatabase->fieldInfo( 'watchlist', 'wl_notificationtimestamp' ); + + if( $info->not_null ) { + echo "Making wl_notificationtimestamp nullable... "; + dbsource( archive( 'patch-watchlist-null.sql' ), $wgDatabase ); + echo "ok\n"; + } else { + echo "...wl_notificationtimestamp is already nullable.\n"; + } + +} + +/** + * @bug 3946 + */ +function do_page_random_update() { + global $wgDatabase; + + echo "Setting page_random to a random value on rows where it equals 0..."; + + $page = $wgDatabase->tableName( 'page' ); + $wgDatabase->query( "UPDATE $page SET page_random = RAND() WHERE page_random = 0", 'do_page_random_update' ); + $rows = $wgDatabase->affectedRows(); + + echo "changed $rows rows\n"; +} + +function do_templatelinks_update() { + global $wgDatabase, $wgLoadBalancer; + $fname = 'do_templatelinks_update'; + + if ( $wgDatabase->tableExists( 'templatelinks' ) ) { + echo "...templatelinks table already exists\n"; + return; + } + echo "Creating templatelinks table...\n"; + dbsource( archive('patch-templatelinks.sql'), $wgDatabase ); + echo "Populating...\n"; + if ( isset( $wgLoadBalancer ) && $wgLoadBalancer->getServerCount() > 1 ) { + // Slow, replication-friendly update + $res = $wgDatabase->select( 'pagelinks', array( 'pl_from', 'pl_namespace', 'pl_title' ), + array( 'pl_namespace' => NS_TEMPLATE ), $fname ); + $count = 0; + while ( $row = $wgDatabase->fetchObject( $res ) ) { + $count = ($count + 1) % 100; + if ( $count == 0 ) { + if ( function_exists( 'wfWaitForSlaves' ) ) { + wfWaitForSlaves( 10 ); + } else { + sleep( 1 ); + } + } + $wgDatabase->insert( 'templatelinks', + array( + 'tl_from' => $row->pl_from, + 'tl_namespace' => $row->pl_namespace, + 'tl_title' => $row->pl_title, + ), $fname + ); + + } + $wgDatabase->freeResult( $res ); + } else { + // Fast update + $wgDatabase->insertSelect( 'templatelinks', 'pagelinks', + array( + 'tl_from' => 'pl_from', + 'tl_namespace' => 'pl_namespace', + 'tl_title' => 'pl_title' + ), array( + 'pl_namespace' => 10 + ), $fname + ); + } + echo "Done. Please run maintenance/refreshLinks.php for a more thorough templatelinks update.\n"; +} + +function do_all_updates( $doShared = false ) { + global $wgNewTables, $wgNewFields, $wgRenamedTables, $wgSharedDB, $wgDatabase; + + $doUser = !$wgSharedDB || $doShared; + + # Rename tables + foreach ( $wgRenamedTables as $tableRecord ) { + rename_table( $tableRecord[0], $tableRecord[1], $tableRecord[2] ); + } + + # Add missing tables + foreach ( $wgNewTables as $tableRecord ) { + add_table( $tableRecord[0], $tableRecord[1] ); + flush(); + } + + # Add missing fields + foreach ( $wgNewFields as $fieldRecord ) { + if ( $fieldRecord[0] != 'user' || $doUser ) { + add_field( $fieldRecord[0], $fieldRecord[1], $fieldRecord[2] ); + } + flush(); + } + + # Do schema updates which require special handling + do_interwiki_update(); flush(); + do_index_update(); flush(); + do_old_links_update(); flush(); + do_image_name_unique_update(); flush(); + do_watchlist_update(); flush(); + if ( $doUser ) { + do_user_update(); flush(); + } +###### do_copy_newtalk_to_watchlist(); flush(); + do_logging_encoding(); flush(); + + do_schema_restructuring(); flush(); + do_inverse_timestamp(); flush(); + do_text_id(); flush(); + do_namespace_size(); flush(); + + do_pagelinks_update(); flush(); + do_templatelinks_update(); flush(); // after pagelinks + + do_drop_img_type(); flush(); + + if ( $doUser ) { + do_user_unique_update(); flush(); + } + do_user_groups_update(); flush(); + + do_watchlist_null(); flush(); + + //do_image_index_update(); flush(); + + do_logging_timestamp_index(); flush(); + + do_page_random_update(); flush(); + + initialiseMessages(); flush(); +} + +function archive($name) { + global $wgDBtype, $IP; + switch ($wgDBtype) { + case "oracle": + return "$IP/maintenance/oracle/archives/$name"; + default: + return "$IP/maintenance/archives/$name"; + } +} +?> diff --git a/maintenance/upgrade1_5.php b/maintenance/upgrade1_5.php new file mode 100644 index 00000000..a269c335 --- /dev/null +++ b/maintenance/upgrade1_5.php @@ -0,0 +1,24 @@ +<?php + +// Alternate 1.4 -> 1.5 schema upgrade +// This does only the main tables + UTF-8 +// and is designed to allow upgrades to interleave +// with other updates on the replication stream so +// that large wikis can be upgraded without disrupting +// other services. +// +// Note: this script DOES NOT apply every update, nor +// will it probably handle much older versions, etc. +// Run this, FOLLOWED BY update.php, for upgrading +// from 1.4.5 release to 1.5. + +$options = array( 'step', 'noimages' ); + +require_once( 'commandLine.inc' ); +require_once( 'FiveUpgrade.inc' ); + +$upgrade = new FiveUpgrade(); +$step = isset( $options['step'] ) ? $options['step'] : null; +$upgrade->upgrade( $step ); + +?> diff --git a/maintenance/userDupes.inc b/maintenance/userDupes.inc new file mode 100644 index 00000000..f66051d4 --- /dev/null +++ b/maintenance/userDupes.inc @@ -0,0 +1,328 @@ +<?php +# Copyright (C) 2005 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * Look for duplicate user table entries and optionally prune them. + */ +class UserDupes { + var $db; + var $reassigned; + var $trimmed; + var $failed; + + function UserDupes( &$database ) { + $this->db =& $database; + } + + /** + * Check if this database's user table has already had a unique + * user_name index applied. + * @return bool + */ + function hasUniqueIndex() { + $fname = 'UserDupes::hasUniqueIndex'; + $info = $this->db->indexInfo( 'user', 'user_name', $fname ); + if( !$info ) { + echo "WARNING: doesn't seem to have user_name index at all!\n"; + return false; + } + + # Confusingly, 'Non_unique' is 0 for *unique* indexes, + # and 1 for *non-unique* indexes. Pass the crack, MySQL, + # it's obviously some good stuff! + return ( $info->Non_unique == 0 ); + } + + /** + * Checks the database for duplicate user account records + * and remove them in preparation for application of a unique + * index on the user_name field. Returns true if the table is + * clean or if duplicates have been resolved automatically. + * + * May return false if there are unresolvable problems. + * Status information will be echo'd to stdout. + * + * @return bool + */ + function clearDupes() { + return $this->checkDupes( true ); + } + + /** + * Checks the database for duplicate user account records + * in preparation for application of a unique index on the + * user_name field. Returns true if the table is clean or + * if duplicates can be resolved automatically. + * + * Returns false if there are duplicates and resolution was + * not requested. (If doing resolution, edits may be reassigned.) + * Status information will be echo'd to stdout. + * + * @param bool $doDelete pass true to actually remove things + * from the database; false to just check. + * @return bool + */ + function checkDupes( $doDelete = false ) { + global $wgDBname; + + if( $this->hasUniqueIndex() ) { + echo "$wgDBname already has a unique index on its user table.\n"; + return true; + } + + $this->lock(); + + echo "Checking for duplicate accounts...\n"; + $dupes = $this->getDupes(); + $count = count( $dupes ); + + echo "Found $count accounts with duplicate records on $wgDBname.\n"; + $this->trimmed = 0; + $this->reassigned = 0; + $this->failed = 0; + foreach( $dupes as $name ) { + $this->examine( $name, $doDelete ); + } + + $this->unlock(); + + echo "\n"; + + if( $this->reassigned > 0 ) { + if( $doDelete ) { + echo "$this->reassigned duplicate accounts had edits reassigned to a canonical record id.\n"; + } else { + echo "$this->reassigned duplicate accounts need to have edits reassigned.\n"; + } + } + + if( $this->trimmed > 0 ) { + if( $doDelete ) { + echo "$this->trimmed duplicate user records were deleted from $wgDBname.\n"; + } else { + echo "$this->trimmed duplicate user accounts were found on $wgDBname which can be removed safely.\n"; + } + } + + if( $this->failed > 0 ) { + echo "Something terribly awry; $this->failed duplicate accounts were not removed.\n"; + return false; + } + + if( $this->trimmed == 0 || $doDelete ) { + echo "It is now safe to apply the unique index on user_name.\n"; + return true; + } else { + echo "Run this script again with the --fix option to automatically delete them.\n"; + return false; + } + } + + /** + * We don't want anybody to mess with our stuff... + * @access private + */ + function lock() { + $fname = 'UserDupes::lock'; + if( $this->newSchema() ) { + $set = array( 'user', 'revision' ); + } else { + $set = array( 'user', 'cur', 'old' ); + } + $names = array_map( array( $this, 'lockTable' ), $set ); + $tables = implode( ',', $names ); + + $result = $this->db->query( "LOCK TABLES $tables", $fname ); + } + + function lockTable( $table ) { + return $this->db->tableName( $table ) . ' WRITE'; + } + + /** + * @return bool + * @access private + */ + function newSchema() { + return class_exists( 'Revision' ); + } + + /** + * @access private + */ + function unlock() { + $fname = 'UserDupes::unlock'; + $result = $this->db->query( "UNLOCK TABLES", $fname ); + } + + /** + * Grab usernames for which multiple records are present in the database. + * @return array + * @access private + */ + function getDupes() { + $fname = 'UserDupes::listDupes'; + $user = $this->db->tableName( 'user' ); + $result = $this->db->query( + "SELECT user_name,COUNT(*) AS n + FROM $user + GROUP BY user_name + HAVING n > 1", $fname ); + + $list = array(); + while( $row = $this->db->fetchObject( $result ) ) { + $list[] = $row->user_name; + } + $this->db->freeResult( $result ); + + return $list; + } + + /** + * Examine user records for the given name. Try to see which record + * will be the one that actually gets used, then check remaining records + * for edits. If the dupes have no edits, we can safely remove them. + * @param string $name + * @param bool $doDelete + * @access private + */ + function examine( $name, $doDelete ) { + $fname = 'UserDupes::listDupes'; + $result = $this->db->select( 'user', + array( 'user_id' ), + array( 'user_name' => $name ), + $fname ); + + $firstRow = $this->db->fetchObject( $result ); + $firstId = $firstRow->user_id; + echo "Record that will be used for '$name' is user_id=$firstId\n"; + + while( $row = $this->db->fetchObject( $result ) ) { + $dupeId = $row->user_id; + echo "... dupe id $dupeId: "; + $edits = $this->editCount( $dupeId ); + if( $edits > 0 ) { + $this->reassigned++; + echo "has $edits edits! "; + if( $doDelete ) { + $this->reassignEdits( $dupeId, $firstId ); + $newEdits = $this->editCount( $dupeId ); + if( $newEdits == 0 ) { + echo "confirmed cleaned. "; + } else { + $this->failed++; + echo "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n"; + continue; + } + } else { + echo "(will need to reassign edits on fix)"; + } + } else { + echo "ok, no edits. "; + } + $this->trimmed++; + if( $doDelete ) { + $this->trimAccount( $dupeId ); + } + echo "\n"; + } + $this->db->freeResult( $result ); + } + + /** + * Count the number of edits attributed to this user. + * Does not currently check log table or other things + * where it might show up... + * @param int $userid + * @return int + * @access private + */ + function editCount( $userid ) { + if( $this->newSchema() ) { + return $this->editCountOn( 'revision', 'rev_user', $userid ); + } else { + return $this->editCountOn( 'cur', 'cur_user', $userid ) + + $this->editCountOn( 'old', 'old_user', $userid ); + } + } + + /** + * Count the number of hits on a given table for this account. + * @param string $table + * @param string $field + * @param int $userid + * @return int + * @access private + */ + function editCountOn( $table, $field, $userid ) { + $fname = 'UserDupes::editCountOn'; + return intval( $this->db->selectField( + $table, + 'COUNT(*)', + array( $field => $userid ), + $fname ) ); + } + + /** + * @param int $from + * @param int $to + * @access private + */ + function reassignEdits( $from, $to ) { + $set = $this->newSchema() + ? array( 'revision' => 'rev_user' ) + : array( 'cur' => 'cur_user', 'old' => 'old_user' ); + foreach( $set as $table => $field ) { + $this->reassignEditsOn( $table, $field, $from, $to ); + } + } + + /** + * @param string $table + * @param string $field + * @param int $from + * @param int $to + * @access private + */ + function reassignEditsOn( $table, $field, $from, $to ) { + $fname = 'UserDupes::reassignEditsOn'; + echo "reassigning on $table... "; + $result = $this->db->update( $table, + array( $field => $to ), + array( $field => $from ), + $fname ); + echo "ok. "; + } + + /** + * Remove a user account line. + * @param int $userid + * @access private + */ + function trimAccount( $userid ) { + $fname = 'UserDupes::trimAccount'; + echo "deleting..."; + $this->db->delete( 'user', array( 'user_id' => $userid ), $fname ); + echo " ok"; + } + +} + + +?>
\ No newline at end of file diff --git a/maintenance/userDupes.php b/maintenance/userDupes.php new file mode 100644 index 00000000..2469c6eb --- /dev/null +++ b/maintenance/userDupes.php @@ -0,0 +1,41 @@ +<?php +# Copyright (C) 2005 Brion Vibber <brion@pobox.com> +# http://www.mediawiki.org/ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +$options = array( 'fix' ); + +/** */ +require_once( 'commandLine.inc' ); +require_once( 'maintenance/userDupes.inc' ); + +$wgTitle = Title::newFromText( 'Dupe user entry cleanup script' ); + +$fix = isset( $options['fix'] ); +$dbw =& wfGetDB( DB_MASTER ); +$duper = new UserDupes( $dbw ); +$retval = $duper->checkDupes( $fix ); + +if( $retval ) { + echo "\nLooks good!\n"; + exit( 0 ); +} else { + echo "\nOh noeees\n"; + exit( -1 ); +} + +?>
\ No newline at end of file diff --git a/maintenance/users.sql b/maintenance/users.sql new file mode 100644 index 00000000..755bf9f7 --- /dev/null +++ b/maintenance/users.sql @@ -0,0 +1,12 @@ +-- SQL script to create required database users with proper +-- access rights. This is run from the installation script +-- which replaces the password variables with their values +-- from local settings. +-- + +GRANT DELETE,INSERT,SELECT,UPDATE,CREATE TEMPORARY TABLES ON `{$wgDBname}`.* + TO '{$wgDBuser}'@'%' IDENTIFIED BY '{$wgDBpassword}'; +GRANT DELETE,INSERT,SELECT,UPDATE,CREATE TEMPORARY TABLES ON `{$wgDBname}`.* + TO '{$wgDBuser}'@localhost IDENTIFIED BY '{$wgDBpassword}'; +GRANT DELETE,INSERT,SELECT,UPDATE,CREATE TEMPORARY TABLES ON `{$wgDBname}`.* + TO '{$wgDBuser}'@localhost.localdomain IDENTIFIED BY '{$wgDBpassword}'; diff --git a/maintenance/wiki-mangleme.php b/maintenance/wiki-mangleme.php new file mode 100644 index 00000000..6b180257 --- /dev/null +++ b/maintenance/wiki-mangleme.php @@ -0,0 +1,553 @@ +<?php +/** + +Author : Nick Jenkins, http://nickj.org/ +Date : 18 May 2006. +License: GPL v 2. + +Desc: + Performs fuzz-style testing of MediaWiki's parser. + The script feeds the parser some randomized malformed wiki-text, and stores + the HTML output. + + Checks the HTML output for: + - unclosed tags + - errors in Tidy + both can indicate potential security issues. + + Can optionally W3C validate of the HTML output (indicates malformed HTML + output). + +Background: + Contains a PHP port, of a "shameless" Python PORT, OF LCAMTUF'S MANGELME + http://www.securiteam.com/tools/6Z00N1PBFK.html + +Requirements: + You need PHP4 or PHP5, with PHP-curl enabled, and Tidy installed. + +Usage: + Update the "Configuration" section, especially the "WIKI_URL" to point + to a local wiki you can test stuff on. You can optionally set + "VALIDATE_ON_WEB" to true, although at the moment very few generated pages + will validate. Then run "php wiki-mangleme.php". + + This will print a list of HTML output that had unclosed tags, and/or that + caused tidy errors. It will keep running until you press Ctrl-C. All output + files are stored in the "mangleme" subdirectory. +*/ + +# This is a command line script, load mediawiki env: +include('commandLine.inc'); + +// Configuration: + +# The directory name where we store the output +# for windows: "c:\\temp\\mangleme" +define("DIRECTORY", "/tmp/mangleme"); + +# URL to some wiki on which we can run our tests: +define("WIKI_URL", $wgServer . $wgScriptPath . '/index.php?title=WIKIMANGLE' ); + +# Should our test output include binary strings? +define("INCLUDE_BINARY", false); + +# Whether we want to send output on the web for validation: +define("VALIDATE_ON_WEB", false); +# URL to use to validate our output: +define("VALIDATOR_URL", "http://validator.w3.org/check"); + + +// If it goes wrong, we want to know about it. +error_reporting(E_ALL); + +///////////////////// DEFINE THE DATA THAT WILL BE USED ////////////////////// +/* Note: Only some HTML tags are understood by MediaWiki, the rest is ignored. + The tags that are ignored have been commented out below. */ + +$data = array(); +// $data["A"] = array("NAME", "HREF", "REF", "REV", "TITLE", "TARGET", "SHAPE", "onLoad", "STYLE"); +// $data["APPLET"] = array("CODEBASE", "CODE", "NAME", "ALIGN", "ALT", "HEIGHT", "WIDTH", "HSPACE", "VSPACE", "DOWNLOAD", "HEIGHT", "NAME", "TITLE", "onLoad", "STYLE"); +// $data["AREA"] = array("SHAPE", "ALT", "CO-ORDS", "HREF", "onLoad", "STYLE"); +$data["B"] = array("onLoad", "STYLE"); +// $data["BANNER"] = array("onLoad", "STYLE"); +// $data["BASE"] = array("HREF", "TARGET", "onLoad", "STYLE"); +// $data["BASEFONT"] = array("SIZE", "onLoad", "STYLE"); +// $data["BGSOUND"] = array("SRC", "LOOP", "onLoad", "STYLE"); +// $data["BQ"] = array("CLEAR", "NOWRAP", "onLoad", "STYLE"); +// $data["BODY"] = array("BACKGROUND", "BGCOLOR", "TEXT", "LINK", "ALINK", "VLINK", "LEFTMARGIN", "TOPMARGIN", "BGPROPERTIES", "onLoad", "STYLE"); +$data["CAPTION"] = array("ALIGN", "VALIGN", "onLoad", "STYLE"); +$data["CENTER"] = array("onLoad", "STYLE"); +// $data["COL"] = array("ALIGN", "SPAN", "onLoad", "STYLE"); +// $data["COLGROUP"] = array("ALIGN", "VALIGN", "HALIGN", "WIDTH", "SPAN", "onLoad", "STYLE"); +$data["DIV"] = array("ALIGN", "CLASS", "LANG", "onLoad", "STYLE"); +// $data["EMBED"] = array("SRC", "HEIGHT", "WIDTH", "UNITS", "NAME", "PALETTE", "onLoad", "STYLE"); +// $data["FIG"] = array("SRC", "ALIGN", "HEIGHT", "WIDTH", "UNITS", "IMAGEMAP", "onLoad", "STYLE"); +// $data["FN"] = array("ID", "onLoad", "STYLE"); +$data["FONT"] = array("SIZE", "COLOR", "FACE", "onLoad", "STYLE"); +// $data["FORM"] = array("ACTION", "METHOD", "ENCTYPE", "TARGET", "SCRIPT", "onLoad", "STYLE"); +// $data["FRAME"] = array("SRC", "NAME", "MARGINWIDTH", "MARGINHEIGHT", "SCROLLING", "FRAMESPACING", "onLoad", "STYLE"); +// $data["FRAMESET"] = array("ROWS", "COLS", "onLoad", "STYLE"); +$data["H1"] = array("SRC", "DINGBAT", "onLoad", "STYLE"); +// $data["HEAD"] = array("onLoad", "STYLE"); +$data["HR"] = array("SRC", "SIZE", "WIDTH", "ALIGN", "COLOR", "onLoad", "STYLE"); +// $data["HTML"] = array("onLoad", "STYLE"); +// $data["IFRAME"] = array("ALIGN", "FRAMEBORDER", "HEIGHT", "MARGINHEIGHT", "MARGINWIDTH", "NAME", "SCROLLING", "SRC", "ADDRESS", "WIDTH", "onLoad", "STYLE"); +// $data["IMG"] = array("ALIGN", "ALT", "SRC", "BORDER", "DYNSRC", "HEIGHT", "HSPACE", "ISMAP", "LOOP", "LOWSRC", "START", "UNITS", "USEMAP", "WIDTH", "VSPACE", "onLoad", "STYLE"); +// $data["INPUT"] = array("TYPE", "NAME", "VALUE", "onLoad", "STYLE"); +// $data["ISINDEX"] = array("HREF", "PROMPT", "onLoad", "STYLE"); +$data["LI"] = array("SRC", "DINGBAT", "SKIP", "TYPE", "VALUE", "onLoad", "STYLE"); +// $data["LINK"] = array("REL", "REV", "HREF", "TITLE", "onLoad", "STYLE"); +// $data["MAP"] = array("NAME", "onLoad", "STYLE"); +// $data["MARQUEE"] = array("ALIGN", "BEHAVIOR", "BGCOLOR", "DIRECTION", "HEIGHT", "HSPACE", "LOOP", "SCROLLAMOUNT", "SCROLLDELAY", "WIDTH", "VSPACE", "onLoad", "STYLE"); +// $data["MENU"] = array("onLoad", "STYLE"); +// $data["META"] = array("HTTP-EQUIV", "CONTENT", "NAME", "onLoad", "STYLE"); +// $data["MULTICOL"] = array("COLS", "GUTTER", "WIDTH", "onLoad", "STYLE"); +// $data["NOFRAMES"] = array("onLoad", "STYLE"); +// $data["NOTE"] = array("CLASS", "SRC", "onLoad", "STYLE"); +// $data["OVERLAY"] = array("SRC", "X", "Y", "HEIGHT", "WIDTH", "UNITS", "IMAGEMAP", "onLoad", "STYLE"); +// $data["PARAM"] = array("NAME", "VALUE", "onLoad", "STYLE"); +// $data["RANGE"] = array("FROM", "UNTIL", "onLoad", "STYLE"); +// $data["SCRIPT"] = array("LANGUAGE", "onLoad", "STYLE"); +// $data["SELECT"] = array("NAME", "SIZE", "MULTIPLE", "WIDTH", "HEIGHT", "UNITS", "onLoad", "STYLE"); +// $data["OPTION"] = array("VALUE", "SHAPE", "onLoad", "STYLE"); +// $data["SPACER"] = array("TYPE", "SIZE", "WIDTH", "HEIGHT", "ALIGN", "onLoad", "STYLE"); +// $data["SPOT"] = array("ID", "onLoad", "STYLE"); +// $data["TAB"] = array("INDENT", "TO", "ALIGN", "DP", "onLoad", "STYLE"); +$data["TABLE"] = array("ALIGN", "WIDTH", "BORDER", "CELLPADDING", "CELLSPACING", "BGCOLOR", "VALIGN", "COLSPEC", "UNITS", "DP", "onLoad", "STYLE"); +// $data["TBODY"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["TD"] = array("COLSPAN", "ROWSPAN", "ALIGN", "VALIGN", "BGCOLOR", "onLoad", "STYLE"); +// $data["TEXTAREA"] = array("NAME", "COLS", "ROWS", "onLoad", "STYLE"); +// $data["TEXTFLOW"] = array("CLASS", "ID", "onLoad", "STYLE"); +// $data["TFOOT"] = array("COLSPAN", "ROWSPAN", "ALIGN", "VALIGN", "BGCOLOR", "onLoad", "STYLE"); +$data["TH"] = array("ALIGN", "CLASS", "ID", "onLoad", "STYLE"); +// $data["TITLE"] = array("onLoad", "STYLE"); +$data["TR"] = array("ALIGN", "VALIGN", "BGCOLOR", "CLASS", "onLoad", "STYLE"); +$data["UL"] = array("SRC", "DINGBAT", "SKIP", "TYPE", "VALUE", "onLoad", "STYLE"); + +// Now add in a few that were not in the original, but which MediaWiki understands, even with +// extraneous attributes: +$data["gallery"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["pre"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["nowiki"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["blockquote"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["span"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["code"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["tt"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["small"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["big"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["s"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["u"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["del"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["ins"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["sub"] = array("CLASS", "ID", "onLoad", "STYLE"); +$data["ol"] = array("CLASS", "ID", "onLoad", "STYLE"); + + +// The types of the HTML that we will be testing were defined above +$types = array_keys($data); + +// Some attribute values. +$other = array("&","=",":","?","\"","\n","%n%n%n%n%n%n%n%n%n%n%n%n","\\"); +$ints = array("0","-1","127","7897","89000","808080","90928345","74326794236234","0xfffffff","ffff"); + +///////////////////////////////// WIKI-SYNTAX /////////////////////////// +/* Note: Defines various wiki-related bits of syntax, that can potentially cause + MediaWiki to do something other than just print that literal text */ +$ext = array( +"[[", "]]", "\n{|", "|}", "{{", "}}", "|", "[[image:", "[", "]", +"=", "==", "===", "====", "=====", "======", "\n*", "*", "\n:", ":", +"{{{", "}}}", +"\n", "\n#", "#", "\n;", ";", "\n ", +"----", "\n----", +"|]]", "~~~", "#REDIRECT [[", "'''", "''", +"ISBN 2", "\n|-", "| ", "\n| ", +"<!--", "-->", +"\"", "'", +">", +"http://","https://","url://","ftp://","file://","irc://","javascript:", +"!", +"\n! ", +"!!", +"||", +".gif", +".png", +".jpg", +".jpeg", +"<!--()()", +'%08X', +'/', +":x{|", +"\n|-", +"\n|+", +"<noinclude>", +"</noinclude>", +"\n-----", +"UNIQ25f46b0524f13e67NOPARSE", +" \302\273", +" :", +" !", +" ;", +"\302\253", +"RFC 000", +"PMID 000", +"?=", +"(", +")". +"]]]", +"../", +"{{{{", +"}}}}", +"{{subst:", +'__NOTOC__', +'__FORCETOC__', +'__NOEDITSECTION__', +'__START__', +'{{PAGENAME}}', +'{{PAGENAMEE}}', +'{{NAMESPACE}}', +'{{MSG:', +'{{MSGNW:', +'__END__', +'{{INT:', +'{{SITENAME}}', +'{{NS:', +'{{LOCALURL:', +'{{LOCALURLE:', +'{{SCRIPTPATH}}', +'{{GRAMMAR:', +'__NOTITLECONVERT__', +'__NOCONTENTCONVERT__', +"<!--MWTEMPLATESECTION=", +"<!--LINK 987-->", +"<!--IWLINK 987-->", +"Image:", +"[[category:", +"{{REVISIONID}}", +"{{SUBPAGENAME}}", +"{{SUBPAGENAMEE}}", +"{{ns:0}}", +"[[:Image", +"[[Special:", +"{{fullurl:}}", +'__TOC__', +"<includeonly>", +"</includeonly>", +"<math>", +"</math>" +); + + +///////////////////// A CLASS THAT GENERATES RANDOM STRINGS OF DATA ////////////////////// + +class htmler { + var $maxparams = 4; + var $maxtypes = 40; + + function randnum($finish,$start=0) { + return mt_rand($start,$finish); + } + + function randstring() { + global $ext; + $thestring = ""; + + for ($i=0; $i<40; $i++) { + $what = $this->randnum(1); + + if ($what == 0) { // include some random wiki syntax + $which = $this->randnum(count($ext) - 1); + $thestring .= $ext[$which]; + } + else { // include some random text + $char = chr(INCLUDE_BINARY ? $this->randnum(255) : $this->randnum(126,32)); + if ($char == "<") $char = ""; // we don't want the '<' character, it stuffs us up. + $length = $this->randnum(8); + $thestring .= str_repeat ($char, $length); + } + } + return $thestring; + } + + function makestring() { + global $ints, $other; + $what = $this->randnum(2); + if ($what == 0) { + return $this->randstring(); + } + elseif ($what == 1) { + return $ints[$this->randnum(count($ints) - 1)]; + } + else { + return $other[$this->randnum(count($other) - 1)]; + } + } + + function loop() { + global $types, $data; + $string = ""; + $i = $this->randnum(count($types) - 1); + $t = $types[$i]; + $arr = $data[$t]; + $string .= "<" . $types[$i] . " "; + for ($z=0; $z<$this->maxparams; $z++) { + $badparam = $arr[$this->randnum(count($arr) - 1)]; + $badstring = $this->makestring(); + $string .= $badparam . "=" . $badstring . " "; + } + $string .= ">\n"; + return $string; + } + + function main() { + $page = ""; + for ($k=0; $k<$this->maxtypes; $k++) { + $page .= $this->loop(); + } + return $page; + } +} + + +//////////////////// SAVING OUTPUT ///////////////////////// + + +/** +** @desc: Utility function for saving a file. Currently has no error checking. +*/ +function saveFile($string, $name) { + $fp = fopen ( DIRECTORY . "/" . $name, "w"); + fwrite($fp, $string); + fclose ($fp); +} + + +//////////////////// MEDIAWIKI PREVIEW ///////////////////////// + +/* +** @desc: Asks MediaWiki for a preview of a string. Returns the HTML. +*/ +function wikiPreview($text) { + + $params = array ( + "action" => "submit", + "wpMinoredit" => "1", + "wpPreview" => "Show preview", + "wpSection" => "new", + "wpEdittime" => "", + "wpSummary" => "This is a test", + "wpTextbox1" => $text + ); + + if( function_exists('curl_init') ) { + $ch = curl_init(); + } else { + die("Could not found 'curl_init' function. Is curl extension enabled ?\n"); + } + + curl_setopt($ch, CURLOPT_POST, 1); // save form using a POST + curl_setopt($ch, CURLOPT_POSTFIELDS, $params); // load the POST variables + curl_setopt($ch, CURLOPT_URL, WIKI_URL); // set url to post to + curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable + + $result=curl_exec ($ch); + + // if we encountered an error, then log it, and exit. + if (curl_error($ch)) { + trigger_error("Curl error #: " . curl_errno($ch) . " - " . curl_error ($ch) ); + print "Curl error #: " . curl_errno($ch) . " - " . curl_error ($ch) . " - exiting.\n"; + exit(); + } + + curl_close ($ch); + + return $result; +} + + +//////////////////// HTML VALIDATION ///////////////////////// + +/* +** @desc: Asks the validator whether this is valid HTML, or not. +*/ +function validateHTML($text) { + + $params = array ("fragment" => $text); + + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_POST, 1); // save form using a POST + curl_setopt($ch, CURLOPT_POSTFIELDS, $params); // load the POST variables + curl_setopt($ch, CURLOPT_URL, VALIDATOR_URL); // set url to post to + curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); // return into a variable + + $result=curl_exec ($ch); + + // if we encountered an error, then log it, and exit. + if (curl_error($ch)) { + trigger_error("Curl error #: " . curl_errno($ch) . " - " . curl_error ($ch) ); + print "Curl error #: " . curl_errno($ch) . " - " . curl_error ($ch) . " - exiting.\n"; + exit(); + } + + curl_close ($ch); + + $valid = (strpos($result, "Failed validation") === false ? true : false); + + return array($valid, $result); +} + + + +/** +** @desc: checks the string to see if tags are balanced. +*/ +function checkOpenCloseTags($string, $filename) { + $valid = true; + + $lines = explode("\n", $string); + + $num_lines = count($lines); + // print "Num lines: " . $num_lines . "\n"; + + foreach ($lines as $line_num => $line) { + + // skip mediawiki's own unbalanced lines. + if ($line_num == 15) continue; + if ($line == "\t\t<style type=\"text/css\">/*<![CDATA[*/") continue; + if ($line == "<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'") continue; + + if ($line == "/*<![CDATA[*/") continue; + if ($line == "/*]]>*/") continue; + if (ereg("^<form id=\"editform\" name=\"editform\" method=\"post\" action=\"", $line)) continue; + if (ereg("^enctype=\"multipart/form-data\"><input type=\"hidden\" name=\"wikidb_session\" value=\"", $line)) continue; // line num and content changes. + if ($line == "<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" rows='25'") continue; + if (ereg("^cols='80'>", $line)) continue; // line num and content changes. + + if ($num_lines - $line_num == 246) continue; + if ($num_lines - $line_num == 65) continue; + if ($num_lines - $line_num == 62) continue; + if ($num_lines - $line_num == 52) continue; + if ($num_lines - $line_num == 50) continue; + if ($num_lines - $line_num == 29) continue; + if ($num_lines - $line_num == 28) continue; + if ($num_lines - $line_num == 27) continue; + if ($num_lines - $line_num == 23) continue; + + if (substr_count($line, "<") > substr_count($line, ">")) { + print "\nUnclosed tag in " . DIRECTORY . "/" . $filename . " on line: " . ($line_num + 1) . " \n$line\n"; + $valid = false; + } + } + return $valid; +} + + +/** +** @desc: Get tidy to check for no HTML errors in the output file (e.g. unescaped strings). +*/ +function tidyCheckFile($name) { + $file = DIRECTORY . "/" . $name; + $x = `tidy -errors -quiet --show-warnings false $file 2>&1`; + if (trim($x) != "") { + print "Tidy errors found in $file:\n$x"; + return false; + } else { + return true; + } +} + + +////////////////////// TESTING FUNCTION //////////////////////// +/** +** @desc: takes a wiki markup string, and tests it for security or validation problems. +*/ +function testWikiMarkup($raw_markup, $testname) { + + // don't overwrite a previous test of the same name. + while (file_exists(DIRECTORY . "/" . $testname . ".raw_markup.txt")) { + $testname .= "-" . mt_rand(0,9); + } + + // upload to MediaWiki install. + $wiki_preview = wikiPreview($raw_markup); + + // save output files + saveFile($raw_markup, $testname . ".raw_markup.txt"); + saveFile($wiki_preview, $testname . ".wiki_preview.html"); + + // validate result + $valid = true; + if (VALIDATE_ON_WEB) list ($valid, $validator_output) = validateHTML($wiki_preview); + $valid = $valid && checkOpenCloseTags ($wiki_preview, $testname . ".wiki_preview.html"); + $valid = $valid && tidyCheckFile( $testname . ".wiki_preview.html" ); + + + if( $valid ) { + // Remove valid tests: + unlink( DIRECTORY . "/" . $testname . ".raw_markup.txt" ); + unlink( DIRECTORY . "/" . $testname . ".wiki_preview.html"); + } elseif( VALIDATE_ON_WEB ) { + saveFile($validator_output, $testname . ".validator_output.html"); + } +} + + +////////////////////// MAIN LOOP //////////////////////// + +// Make directory if doesn't exist +if (!is_dir(DIRECTORY)) { + mkdir (DIRECTORY, 0700 ); +} +// otherwise, retest the things that we have found in previous runs +else { + print "Retesting previously found problems.\n"; + + // create a handler for the directory + $handler = opendir(DIRECTORY); + + // keep going until all files in directory have been read + while ($file = readdir($handler)) { + + // if file is not raw markup, or is a retest, then skip it. + if (!ereg("\.raw_markup.txt$", $file)) continue; + if ( ereg("^retest-", $file)) continue; + + print "Retesting " . DIRECTORY . "/" . $file . "\n"; + + // get file contents + $markup = file_get_contents(DIRECTORY . "/" . $file); + + // run retest + testWikiMarkup($markup, "retest-" . $file); + } + + // tidy up: close the handler + closedir($handler); + + print "Done retesting.\n"; +} + +// seed the random number generator +mt_srand(crc32(microtime())); + +// main loop. +$h = new htmler(); + +print "Beginning main loop. Results are stored in the ".DIRECTORY." directory.\n"; +print "Press CTRL+C to stop testing.\n"; +for ($count=0; true /*$count<10000 */ ; $count++) { // while (true) + switch( $count % 4 ) { + case '0': print "\r/"; break; + case '1': print "\r-"; break; + case '2': print "\r\\"; break; + case '3': print "\r|"; break; + } + print " $count"; + + // generate and save text to test. + $raw_markup = $h->main(); + + // test this wiki markup + testWikiMarkup($raw_markup, $count); +} +?> diff --git a/maintenance/wikipedia-interwiki.sql b/maintenance/wikipedia-interwiki.sql new file mode 100644 index 00000000..c6e4883f --- /dev/null +++ b/maintenance/wikipedia-interwiki.sql @@ -0,0 +1,220 @@ +-- For convenience, here are the *in-project* interwiki prefixes +-- for Wikipedia. + +REPLACE INTO /*$wgDBprefix*/interwiki (iw_prefix,iw_url,iw_local) VALUES +('q','http://en.wikiquote.org/wiki/$1',1), +('b','http://en.wikibooks.org/wiki/$1',1), +('n','http://en.wikinews.org/wiki/$1',1), +('aa','http://aa.wikipedia.org/wiki/$1',1), +('ab','http://ab.wikipedia.org/wiki/$1',1), +('af','http://af.wikipedia.org/wiki/$1',1), +('ak','http://ak.wikipedia.org/wiki/$1',1), +('als','http://als.wikipedia.org/wiki/$1',1), +('am','http://am.wikipedia.org/wiki/$1',1), +('an','http://an.wikipedia.org/wiki/$1',1), +('ang','http://ang.wikipedia.org/wiki/$1',1), +('ar','http://ar.wikipedia.org/wiki/$1',1), +('arc','http://arc.wikipedia.org/wiki/$1',1), +('as','http://as.wikipedia.org/wiki/$1',1), +('ast','http://ast.wikipedia.org/wiki/$1',1), +('av','http://av.wikipedia.org/wiki/$1',1), +('ay','http://ay.wikipedia.org/wiki/$1',1), +('az','http://az.wikipedia.org/wiki/$1',1), +('ba','http://ba.wikipedia.org/wiki/$1',1), +('be','http://be.wikipedia.org/wiki/$1',1), +('bg','http://bg.wikipedia.org/wiki/$1',1), +('bh','http://bh.wikipedia.org/wiki/$1',1), +('bi','http://bi.wikipedia.org/wiki/$1',1), +('bm','http://bm.wikipedia.org/wiki/$1',1), +('bn','http://bn.wikipedia.org/wiki/$1',1), +('bo','http://bo.wikipedia.org/wiki/$1',1), +('br','http://br.wikipedia.org/wiki/$1',1), +('bs','http://bs.wikipedia.org/wiki/$1',1), +('ca','http://ca.wikipedia.org/wiki/$1',1), +('ce','http://ce.wikipedia.org/wiki/$1',1), +('ch','http://ch.wikipedia.org/wiki/$1',1), +('cho','http://cho.wikipedia.org/wiki/$1',1), +('chr','http://chr.wikipedia.org/wiki/$1',1), +('chy','http://chy.wikipedia.org/wiki/$1',1), +('co','http://co.wikipedia.org/wiki/$1',1), +('cr','http://cr.wikipedia.org/wiki/$1',1), +('cs','http://cs.wikipedia.org/wiki/$1',1), +('csb','http://csb.wikipedia.org/wiki/$1',1), +('cv','http://cv.wikipedia.org/wiki/$1',1), +('cy','http://cy.wikipedia.org/wiki/$1',1), +('da','http://da.wikipedia.org/wiki/$1',1), +('de','http://de.wikipedia.org/wiki/$1',1), +('dv','http://dv.wikipedia.org/wiki/$1',1), +('dz','http://dz.wikipedia.org/wiki/$1',1), +('ee','http://ee.wikipedia.org/wiki/$1',1), +('el','http://el.wikipedia.org/wiki/$1',1), +('en','http://en.wikipedia.org/wiki/$1',1), +('eo','http://eo.wikipedia.org/wiki/$1',1), +('es','http://es.wikipedia.org/wiki/$1',1), +('et','http://et.wikipedia.org/wiki/$1',1), +('eu','http://eu.wikipedia.org/wiki/$1',1), +('fa','http://fa.wikipedia.org/wiki/$1',1), +('ff','http://ff.wikipedia.org/wiki/$1',1), +('fi','http://fi.wikipedia.org/wiki/$1',1), +('fj','http://fj.wikipedia.org/wiki/$1',1), +('fo','http://fo.wikipedia.org/wiki/$1',1), +('fr','http://fr.wikipedia.org/wiki/$1',1), +('fy','http://fy.wikipedia.org/wiki/$1',1), +('fur','http://fur.wikipedia.org/wiki/$1',1), +('ga','http://ga.wikipedia.org/wiki/$1',1), +('gd','http://gd.wikipedia.org/wiki/$1',1), +('gl','http://gl.wikipedia.org/wiki/$1',1), +('gn','http://gn.wikipedia.org/wiki/$1',1), +('got','http://got.wikipedia.org/wiki/$1',1), +('gu','http://gu.wikipedia.org/wiki/$1',1), +('gv','http://gv.wikipedia.org/wiki/$1',1), +('ha','http://ha.wikipedia.org/wiki/$1',1), +('haw','http://haw.wikipedia.org/wiki/$1',1), +('he','http://he.wikipedia.org/wiki/$1',1), +('hi','http://hi.wikipedia.org/wiki/$1',1), +('ho','http://ho.wikipedia.org/wiki/$1',1), +('hr','http://hr.wikipedia.org/wiki/$1',1), +('ht','http://ht.wikipedia.org/wiki/$1',1), +('hu','http://hu.wikipedia.org/wiki/$1',1), +('hy','http://hy.wikipedia.org/wiki/$1',1), +('hz','http://hz.wikipedia.org/wiki/$1',1), +('ia','http://ia.wikipedia.org/wiki/$1',1), +('id','http://id.wikipedia.org/wiki/$1',1), +('ie','http://ie.wikipedia.org/wiki/$1',1), +('ig','http://ig.wikipedia.org/wiki/$1',1), +('ii','http://ii.wikipedia.org/wiki/$1',1), +('ik','http://ik.wikipedia.org/wiki/$1',1), +('io','http://io.wikipedia.org/wiki/$1',1), +('is','http://is.wikipedia.org/wiki/$1',1), +('it','http://it.wikipedia.org/wiki/$1',1), +('iu','http://iu.wikipedia.org/wiki/$1',1), +('ja','http://ja.wikipedia.org/wiki/$1',1), +('jbo','http://jbo.wikipedia.org/wiki/$1',1), +('jv','http://jv.wikipedia.org/wiki/$1',1), +('ka','http://ka.wikipedia.org/wiki/$1',1), +('kg','http://kg.wikipedia.org/wiki/$1',1), +('ki','http://ki.wikipedia.org/wiki/$1',1), +('kj','http://kj.wikipedia.org/wiki/$1',1), +('kk','http://kk.wikipedia.org/wiki/$1',1), +('kl','http://kl.wikipedia.org/wiki/$1',1), +('km','http://km.wikipedia.org/wiki/$1',1), +('kn','http://kn.wikipedia.org/wiki/$1',1), +('ko','http://ko.wikipedia.org/wiki/$1',1), +('kr','http://kr.wikipedia.org/wiki/$1',1), +('ks','http://ks.wikipedia.org/wiki/$1',1), +('ku','http://ku.wikipedia.org/wiki/$1',1), +('kv','http://kv.wikipedia.org/wiki/$1',1), +('kw','http://kw.wikipedia.org/wiki/$1',1), +('ky','http://ky.wikipedia.org/wiki/$1',1), +('la','http://la.wikipedia.org/wiki/$1',1), +('lb','http://lb.wikipedia.org/wiki/$1',1), +('lg','http://lg.wikipedia.org/wiki/$1',1), +('li','http://li.wikipedia.org/wiki/$1',1), +('ln','http://ln.wikipedia.org/wiki/$1',1), +('lo','http://lo.wikipedia.org/wiki/$1',1), +('lt','http://lt.wikipedia.org/wiki/$1',1), +('lv','http://lv.wikipedia.org/wiki/$1',1), +('mg','http://mg.wikipedia.org/wiki/$1',1), +('mh','http://mh.wikipedia.org/wiki/$1',1), +('mi','http://mi.wikipedia.org/wiki/$1',1), +('mk','http://mk.wikipedia.org/wiki/$1',1), +('ml','http://ml.wikipedia.org/wiki/$1',1), +('mn','http://mn.wikipedia.org/wiki/$1',1), +('mo','http://mo.wikipedia.org/wiki/$1',1), +('mr','http://mr.wikipedia.org/wiki/$1',1), +('ms','http://ms.wikipedia.org/wiki/$1',1), +('mt','http://mt.wikipedia.org/wiki/$1',1), +('mus','http://mus.wikipedia.org/wiki/$1',1), +('my','http://my.wikipedia.org/wiki/$1',1), +('na','http://na.wikipedia.org/wiki/$1',1), +('nah','http://nah.wikipedia.org/wiki/$1',1), +('nb','http://nb.wikipedia.org/wiki/$1',1), +('nds','http://nds.wikipedia.org/wiki/$1',1), +('ne','http://ne.wikipedia.org/wiki/$1',1), +('ng','http://ng.wikipedia.org/wiki/$1',1), +('nl','http://nl.wikipedia.org/wiki/$1',1), +('nn','http://nn.wikipedia.org/wiki/$1',1), +('no','http://no.wikipedia.org/wiki/$1',1), +('nv','http://nv.wikipedia.org/wiki/$1',1), +('ny','http://ny.wikipedia.org/wiki/$1',1), +('oc','http://oc.wikipedia.org/wiki/$1',1), +('om','http://om.wikipedia.org/wiki/$1',1), +('or','http://or.wikipedia.org/wiki/$1',1), +('pa','http://pa.wikipedia.org/wiki/$1',1), +('pi','http://pi.wikipedia.org/wiki/$1',1), +('pl','http://pl.wikipedia.org/wiki/$1',1), +('ps','http://ps.wikipedia.org/wiki/$1',1), +('pt','http://pt.wikipedia.org/wiki/$1',1), +('qu','http://qu.wikipedia.org/wiki/$1',1), +('rm','http://rm.wikipedia.org/wiki/$1',1), +('rn','http://rn.wikipedia.org/wiki/$1',1), +('ro','http://ro.wikipedia.org/wiki/$1',1), +('roa-rup','http://roa-rup.wikipedia.org/wiki/$1',1), +('ru','http://ru.wikipedia.org/wiki/$1',1), +('rw','http://rw.wikipedia.org/wiki/$1',1), +('sa','http://sa.wikipedia.org/wiki/$1',1), +('sc','http://sc.wikipedia.org/wiki/$1',1), +('scn','http://scn.wikipedia.org/wiki/$1',1), +('sd','http://sd.wikipedia.org/wiki/$1',1), +('se','http://se.wikipedia.org/wiki/$1',1), +('sg','http://sg.wikipedia.org/wiki/$1',1), +('sh','http://sh.wikipedia.org/wiki/$1',1), +('si','http://si.wikipedia.org/wiki/$1',1), +('simple','http://simple.wikipedia.org/wiki/$1',1), +('sk','http://sk.wikipedia.org/wiki/$1',1), +('sl','http://sl.wikipedia.org/wiki/$1',1), +('sm','http://sm.wikipedia.org/wiki/$1',1), +('sn','http://sn.wikipedia.org/wiki/$1',1), +('so','http://so.wikipedia.org/wiki/$1',1), +('sq','http://sq.wikipedia.org/wiki/$1',1), +('sr','http://sr.wikipedia.org/wiki/$1',1), +('ss','http://ss.wikipedia.org/wiki/$1',1), +('st','http://st.wikipedia.org/wiki/$1',1), +('su','http://su.wikipedia.org/wiki/$1',1), +('sv','http://sv.wikipedia.org/wiki/$1',1), +('sw','http://sw.wikipedia.org/wiki/$1',1), +('ta','http://ta.wikipedia.org/wiki/$1',1), +('te','http://te.wikipedia.org/wiki/$1',1), +('tg','http://tg.wikipedia.org/wiki/$1',1), +('th','http://th.wikipedia.org/wiki/$1',1), +('ti','http://ti.wikipedia.org/wiki/$1',1), +('tk','http://tk.wikipedia.org/wiki/$1',1), +('tl','http://tl.wikipedia.org/wiki/$1',1), +('tlh','http://tlh.wikipedia.org/wiki/$1',1), +('tn','http://tn.wikipedia.org/wiki/$1',1), +('to','http://to.wikipedia.org/wiki/$1',1), +('tokipona','http://tokipona.wikipedia.org/wiki/$1',1), +('tpi','http://tpi.wikipedia.org/wiki/$1',1), +('tr','http://tr.wikipedia.org/wiki/$1',1), +('ts','http://ts.wikipedia.org/wiki/$1',1), +('tt','http://tt.wikipedia.org/wiki/$1',1), +('tum','http://tum.wikipedia.org/wiki/$1',1), +('tw','http://tw.wikipedia.org/wiki/$1',1), +('ty','http://ty.wikipedia.org/wiki/$1',1), +('ug','http://ug.wikipedia.org/wiki/$1',1), +('uk','http://uk.wikipedia.org/wiki/$1',1), +('ur','http://ur.wikipedia.org/wiki/$1',1), +('uz','http://uz.wikipedia.org/wiki/$1',1), +('ve','http://ve.wikipedia.org/wiki/$1',1), +('vi','http://vi.wikipedia.org/wiki/$1',1), +('vo','http://vo.wikipedia.org/wiki/$1',1), +('wa','http://wa.wikipedia.org/wiki/$1',1), +('wo','http://wo.wikipedia.org/wiki/$1',1), +('xh','http://xh.wikipedia.org/wiki/$1',1), +('yi','http://yi.wikipedia.org/wiki/$1',1), +('yo','http://yo.wikipedia.org/wiki/$1',1), +('za','http://za.wikipedia.org/wiki/$1',1), +('zh','http://zh.wikipedia.org/wiki/$1',1), +('zh-min-nan','http://zh-min-nan.wikipedia.org/wiki/$1',1), +('zu','http://zu.wikipedia.org/wiki/$1',1), +('zh-cn','http://zh.wikipedia.org/wiki/$1',1), +('zh-tw','http://zh.wikipedia.org/wiki/$1',1), +('minnan','http://zh-min-nan.wikipedia.org/wiki/$1',1), +('zh-cfr','http://zh-min-nan.wikipedia.org/wiki/$1',1), +('dk','http://da.wikipedia.org/wiki/$1',1), +('w','http://en.wikipedia.org/wiki/$1',1), +('m','http://meta.wikimedia.org/wiki/$1',1), +('meta','http://meta.wikimedia.org/wiki/$1',1), +('sep11','http://sep11.wikipedia.org/wiki/$1',1), +('os','http://os.wikipedia.org/wiki/$1',1); + diff --git a/maintenance/wiktionary-interwiki.sql b/maintenance/wiktionary-interwiki.sql new file mode 100644 index 00000000..787962d5 --- /dev/null +++ b/maintenance/wiktionary-interwiki.sql @@ -0,0 +1,160 @@ +-- For convenience, here are the *in-project* interwiki prefixes +-- for Wikipedia. + +REPLACE INTO /*$wgDBprefix*/interwiki (iw_prefix,iw_url,iw_local) VALUES +('w','http://www.wikipedia.org/wiki/$1',1), +('m','http://meta.wikipedia.org/wiki/$1',1), +('meta','http://meta.wikipedia.org/wiki/$1',1), +('sep11','http://sep11.wikipedia.org/wiki/$1',1), +('simple','http://simple.wiktionary.org/wiki/$1',1), +('aa','http://aa.wiktionary.org/wiki/$1',1), +('ab','http://ab.wiktionary.org/wiki/$1',1), +('af','http://af.wiktionary.org/wiki/$1',1), +('als','http://als.wiktionary.org/wiki/$1',1), +('am','http://am.wiktionary.org/wiki/$1',1), +('ar','http://ar.wiktionary.org/wiki/$1',1), +('as','http://as.wiktionary.org/wiki/$1',1), +('ay','http://ay.wiktionary.org/wiki/$1',1), +('az','http://az.wiktionary.org/wiki/$1',1), +('ba','http://ba.wiktionary.org/wiki/$1',1), +('be','http://be.wiktionary.org/wiki/$1',1), +('bg','http://bg.wiktionary.org/wiki/$1',1), +('bh','http://bh.wiktionary.org/wiki/$1',1), +('bi','http://bi.wiktionary.org/wiki/$1',1), +('bn','http://bn.wiktionary.org/wiki/$1',1), +('bo','http://bo.wiktionary.org/wiki/$1',1), +('bs','http://bs.wiktionary.org/wiki/$1',1), +('ca','http://ca.wiktionary.org/wiki/$1',1), +('chr','http://chr.wiktionary.org/wiki/$1',1), +('co','http://co.wiktionary.org/wiki/$1',1), +('cs','http://cs.wiktionary.org/wiki/$1',1), +('csb','http://csb.wiktionary.org/wiki/$1',1), +('cy','http://cy.wiktionary.org/wiki/$1',1), +('da','http://da.wiktionary.org/wiki/$1',1), +('de','http://de.wiktionary.org/wiki/$1',1), +('dk','http://da.wiktionary.org/wiki/$1',1), +('dz','http://dz.wiktionary.org/wiki/$1',1), +('el','http://el.wiktionary.org/wiki/$1',1), +('en','http://en.wiktionary.org/wiki/$1',1), +('eo','http://eo.wiktionary.org/wiki/$1',1), +('es','http://es.wiktionary.org/wiki/$1',1), +('et','http://et.wiktionary.org/wiki/$1',1), +('eu','http://eu.wiktionary.org/wiki/$1',1), +('fa','http://fa.wiktionary.org/wiki/$1',1), +('fi','http://fi.wiktionary.org/wiki/$1',1), +('fj','http://fj.wiktionary.org/wiki/$1',1), +('fo','http://fo.wiktionary.org/wiki/$1',1), +('fr','http://fr.wiktionary.org/wiki/$1',1), +('fy','http://fy.wiktionary.org/wiki/$1',1), +('ga','http://ga.wiktionary.org/wiki/$1',1), +('gd','http://gd.wiktionary.org/wiki/$1',1), +('gl','http://gl.wiktionary.org/wiki/$1',1), +('gn','http://gn.wiktionary.org/wiki/$1',1), +('gu','http://gu.wiktionary.org/wiki/$1',1), +('gv','http://gv.wiktionary.org/wiki/$1',1), +('ha','http://ha.wiktionary.org/wiki/$1',1), +('he','http://he.wiktionary.org/wiki/$1',1), +('hi','http://hi.wiktionary.org/wiki/$1',1), +('hr','http://hr.wiktionary.org/wiki/$1',1), +('hu','http://hu.wiktionary.org/wiki/$1',1), +('hy','http://hy.wiktionary.org/wiki/$1',1), +('ia','http://ia.wiktionary.org/wiki/$1',1), +('id','http://id.wiktionary.org/wiki/$1',1), +('ik','http://ik.wiktionary.org/wiki/$1',1), +('io','http://io.wiktionary.org/wiki/$1',1), +('is','http://is.wiktionary.org/wiki/$1',1), +('it','http://it.wiktionary.org/wiki/$1',1), +('iu','http://iu.wiktionary.org/wiki/$1',1), +('ja','http://ja.wiktionary.org/wiki/$1',1), +('jv','http://jv.wiktionary.org/wiki/$1',1), +('ka','http://ka.wiktionary.org/wiki/$1',1), +('kk','http://kk.wiktionary.org/wiki/$1',1), +('kl','http://kl.wiktionary.org/wiki/$1',1), +('km','http://km.wiktionary.org/wiki/$1',1), +('kn','http://kn.wiktionary.org/wiki/$1',1), +('ko','http://ko.wiktionary.org/wiki/$1',1), +('ks','http://ks.wiktionary.org/wiki/$1',1), +('ku','http://ku.wiktionary.org/wiki/$1',1), +('ky','http://ky.wiktionary.org/wiki/$1',1), +('la','http://la.wiktionary.org/wiki/$1',1), +('lo','http://lo.wiktionary.org/wiki/$1',1), +('lt','http://lt.wiktionary.org/wiki/$1',1), +('lv','http://lv.wiktionary.org/wiki/$1',1), +('mg','http://mg.wiktionary.org/wiki/$1',1), +('mi','http://mi.wiktionary.org/wiki/$1',1), +('mk','http://mk.wiktionary.org/wiki/$1',1), +('ml','http://ml.wiktionary.org/wiki/$1',1), +('mn','http://mn.wiktionary.org/wiki/$1',1), +('mo','http://mo.wiktionary.org/wiki/$1',1), +('mr','http://mr.wiktionary.org/wiki/$1',1), +('ms','http://ms.wiktionary.org/wiki/$1',1), +('my','http://my.wiktionary.org/wiki/$1',1), +('na','http://na.wiktionary.org/wiki/$1',1), +('nah','http://nah.wiktionary.org/wiki/$1',1), +('nb', 'http://no.wiktionary.org/wiki/$1',1), +('nds','http://nds.wiktionary.org/wiki/$1',1), +('ne','http://ne.wiktionary.org/wiki/$1',1), +('nl','http://nl.wiktionary.org/wiki/$1',1), +('no','http://no.wiktionary.org/wiki/$1',1), +('oc','http://oc.wiktionary.org/wiki/$1',1), +('om','http://om.wiktionary.org/wiki/$1',1), +('or','http://or.wiktionary.org/wiki/$1',1), +('pa','http://pa.wiktionary.org/wiki/$1',1), +('pl','http://pl.wiktionary.org/wiki/$1',1), +('ps','http://ps.wiktionary.org/wiki/$1',1), +('pt','http://pt.wiktionary.org/wiki/$1',1), +('qu','http://qu.wiktionary.org/wiki/$1',1), +('rm','http://rm.wiktionary.org/wiki/$1',1), +('rn','http://rn.wiktionary.org/wiki/$1',1), +('ro','http://ro.wiktionary.org/wiki/$1',1), +('ru','http://ru.wiktionary.org/wiki/$1',1), +('rw','http://rw.wiktionary.org/wiki/$1',1), +('sa','http://sa.wiktionary.org/wiki/$1',1), +('sd','http://sd.wiktionary.org/wiki/$1',1), +('sg','http://sg.wiktionary.org/wiki/$1',1), +('sh','http://sh.wiktionary.org/wiki/$1',1), +('si','http://si.wiktionary.org/wiki/$1',1), +('sk','http://sk.wiktionary.org/wiki/$1',1), +('sl','http://sl.wiktionary.org/wiki/$1',1), +('sm','http://sm.wiktionary.org/wiki/$1',1), +('sn','http://sn.wiktionary.org/wiki/$1',1), +('so','http://so.wiktionary.org/wiki/$1',1), +('sq','http://sq.wiktionary.org/wiki/$1',1), +('sr','http://sr.wiktionary.org/wiki/$1',1), +('ss','http://ss.wiktionary.org/wiki/$1',1), +('st','http://st.wiktionary.org/wiki/$1',1), +('su','http://su.wiktionary.org/wiki/$1',1), +('sv','http://sv.wiktionary.org/wiki/$1',1), +('sw','http://sw.wiktionary.org/wiki/$1',1), +('ta','http://ta.wiktionary.org/wiki/$1',1), +('te','http://te.wiktionary.org/wiki/$1',1), +('tg','http://tg.wiktionary.org/wiki/$1',1), +('th','http://th.wiktionary.org/wiki/$1',1), +('ti','http://ti.wiktionary.org/wiki/$1',1), +('tk','http://tk.wiktionary.org/wiki/$1',1), +('tl','http://tl.wiktionary.org/wiki/$1',1), +('tn','http://tn.wiktionary.org/wiki/$1',1), +('to','http://to.wiktionary.org/wiki/$1',1), +('tokipona','http://tokipona.wiktionary.org/wiki/$1',1), +('tpi','http://tpi.wiktionary.org/wiki/$1',1), +('tr','http://tr.wiktionary.org/wiki/$1',1), +('ts','http://ts.wiktionary.org/wiki/$1',1), +('tt','http://tt.wiktionary.org/wiki/$1',1), +('tw','http://tw.wiktionary.org/wiki/$1',1), +('ug','http://ug.wiktionary.org/wiki/$1',1), +('uk','http://uk.wiktionary.org/wiki/$1',1), +('ur','http://ur.wiktionary.org/wiki/$1',1), +('uz','http://uz.wiktionary.org/wiki/$1',1), +('vi','http://vi.wiktionary.org/wiki/$1',1), +('vo','http://vo.wiktionary.org/wiki/$1',1), +('wa','http://wa.wiktionary.org/wiki/$1',1), +('wo','http://wo.wiktionary.org/wiki/$1',1), +('xh','http://xh.wiktionary.org/wiki/$1',1), +('yi','http://yi.wiktionary.org/wiki/$1',1), +('yo','http://yo.wiktionary.org/wiki/$1',1), +('za','http://za.wiktionary.org/wiki/$1',1), +('zh','http://zh.wiktionary.org/wiki/$1',1), +('zh-cn','http://zh.wiktionary.org/wiki/$1',1), +('zh-tw','http://zh.wiktionary.org/wiki/$1',1), +('zu','http://zu.wiktionary.org/wiki/$1',1); + |