diff options
Diffstat (limited to 'maintenance/postgres')
-rw-r--r-- | maintenance/postgres/compare_schemas.pl | 181 | ||||
-rw-r--r-- | maintenance/postgres/tables.sql | 156 | ||||
-rw-r--r-- | maintenance/postgres/wp_mysql2postgres.pl | 400 |
3 files changed, 687 insertions, 50 deletions
diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl new file mode 100644 index 00000000..4a76b270 --- /dev/null +++ b/maintenance/postgres/compare_schemas.pl @@ -0,0 +1,181 @@ +#!/usr/bin/perl + +## Rough check that the base and postgres "tables.sql" are in sync +## Should be run from maintenance/postgres + +use strict; +use warnings; +use Data::Dumper; + +my @old = ("../tables.sql"); +my $new = "tables.sql"; + +## Read in exceptions and other metadata +my %ok; +while (<DATA>) { + next unless /^(\w+)\s*:\s*([^#]+)/; + my ($name,$val) = ($1,$2); + chomp $val; + if ($name eq 'RENAME') { + die "Invalid rename\n" unless $val =~ /(\w+)\s+(\w+)/; + $ok{OLD}{$1} = $2; + $ok{NEW}{$2} = $1; + next; + } + if ($name eq 'XFILE') { + push @old, $val; + next; + } + for (split(/\s+/ => $val)) { + $ok{$name}{$_} = 0; + } +} + +open my $newfh, "<", $new or die qq{Could not open $new: $!\n}; + +my $datatype = join '|' => qw( +bool +tinyint int bigint real float +tinytext mediumtext text char varchar +timestamp datetime +tinyblob mediumblob blob +); +$datatype .= q{|ENUM\([\"\w, ]+\)}; +$datatype = qr{($datatype)}; + +my $typeval = qr{(\(\d+\))?}; + +my $typeval2 = qr{ unsigned| binary| NOT NULL| NULL| auto_increment| default ['\-\d\w"]+| REFERENCES .+CASCADE}; + +my $indextype = join '|' => qw(INDEX KEY FULLTEXT), "PRIMARY KEY", "UNIQUE INDEX", "UNIQUE KEY"; +$indextype = qr{$indextype}; + +my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+}; + +my ($table,%old); +for my $old (@old) { + open my $oldfh, "<", $old or die qq{Could not open $old: $!\n}; + + while (<$oldfh>) { + next if /^\s*\-\-/ or /^\s+$/; + s/\s*\-\- [\w ]+$//; + chomp; + + if (/CREATE\s*TABLE/i) { + m{^CREATE TABLE /\*\$wgDBprefix\*/(\w+) \($} + or die qq{Invalid CREATE TABLE at line $. of $old\n}; + $table = $1; + $old{$table}{name}=$table; + } + elsif (/^\) TYPE=($tabletype);$/) { + $old{$table}{type}=$1; + } + elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) { + $old{$table}{column}{$1} = $2; + } + elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) { + $old{$table}{lc $1."_name"} = $2 ? $2 : ""; + $old{$table}{lc $1."pk_target"} = $3; + } + else { + die "Cannot parse line $. of $old:\n$_\n"; + } + } + close $oldfh; +} + +$datatype = join '|' => qw( +SMALLINT INTEGER BIGINT NUMERIC SERIAL +TEXT CHAR VARCHAR +BYTEA +TIMESTAMPTZ +CIDR +); +$datatype = qr{($datatype)}; +my %new; +my ($infunction,$inview,$inrule) = (0,0,0); +while (<$newfh>) { + next if /^\s*\-\-/ or /^\s*$/; + s/\s*\-\- [\w ']+$//; + next if /^BEGIN;/ or /^SET / or /^COMMIT;/; + next if /^CREATE SEQUENCE/; + next if /^CREATE(?: UNIQUE)? INDEX/; + next if /^CREATE FUNCTION/; + next if /^CREATE TRIGGER/ or /^ FOR EACH ROW/; + next if /^INSERT INTO/ or /^ VALUES \(/; + next if /^ALTER TABLE/; + chomp; + + if (/^\$mw\$;?$/) { + $infunction = $infunction ? 0 : 1; + next; + } + next if $infunction; + + next if /^CREATE VIEW/ and $inview = 1; + if ($inview) { + /;$/ and $inview = 0; + next; + } + + next if /^CREATE RULE/ and $inrule = 1; + if ($inrule) { + /;$/ and $inrule = 0; + next; + } + + if (/^CREATE TABLE "?(\w+)"? \($/) { + $table = $1; + $new{$table}{name}=$table; + } + elsif (/^\);$/) { + } + elsif (/^ (\w+) +$datatype/) { + $new{$table}{column}{$1} = $2; + } + else { + die "Cannot parse line $. of $new:\n$_\n"; + } +} +close $newfh; + +## Old but not new +for my $t (sort keys %old) { + if (!exists $new{$t} and !exists $ok{OLD}{$t}) { + print "Table not in $new: $t\n"; + next; + } + next if exists $ok{OLD}{$t} and !$ok{OLD}{$t}; + my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t; + my $oldcol = $old{$t}{column}; + my $newcol = $new{$newt}{column}; + for my $c (keys %$oldcol) { + if (!exists $newcol->{$c}) { + print "Column $t.$c not in new\n"; + next; + } + } + for my $c (keys %$newcol) { + if (!exists $oldcol->{$c}) { + print "Column $t.$c not in old\n"; + next; + } + } +} +## New but not old: +for (sort keys %new) { + if (!exists $old{$_} and !exists $ok{NEW}{$_}) { + print "Not in old: $_\n"; + next; + } +} + +__DATA__ +## Known exceptions +OLD: searchindex ## We use tsearch2 directly on the page table instead +OLD: archive ## This is a view due to the char(14) timestamp hack +RENAME: user mwuser ## Reserved word causing lots of problems +RENAME: text pagecontent ## Reserved word +NEW: archive2 ## The real archive table +NEW: mediawiki_version ## Just us, for now +XFILE: ../archives/patch-profiling.sql diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql index 5481a394..9ac329d8 100644 --- a/maintenance/postgres/tables.sql +++ b/maintenance/postgres/tables.sql @@ -11,7 +11,7 @@ BEGIN; SET client_min_messages = 'ERROR'; CREATE SEQUENCE user_user_id_seq MINVALUE 0 START WITH 0; -CREATE TABLE "user" ( +CREATE TABLE mwuser ( -- replace reserved word 'user' user_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('user_user_id_seq'), user_name TEXT NOT NULL UNIQUE, user_real_name TEXT, @@ -26,20 +26,20 @@ CREATE TABLE "user" ( user_touched TIMESTAMPTZ, user_registration TIMESTAMPTZ ); -CREATE INDEX user_email_token_idx ON "user" (user_email_token); +CREATE INDEX user_email_token_idx ON mwuser (user_email_token); -- Create a dummy user to satisfy fk contraints especially with revisions -INSERT INTO "user" VALUES - (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); +INSERT INTO mwuser + VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); CREATE TABLE user_groups ( - ug_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + ug_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, ug_group TEXT NOT NULL ); CREATE UNIQUE INDEX user_groups_unique ON user_groups (ug_user, ug_group); CREATE TABLE user_newtalk ( - user_id INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + user_id INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, user_ip CIDR NULL ); CREATE INDEX user_newtalk_id_idx ON user_newtalk (user_id); @@ -69,18 +69,24 @@ CREATE INDEX page_project_title ON page (page_title) WHERE page_namespace = CREATE INDEX page_random_idx ON page (page_random); CREATE INDEX page_len_idx ON page (page_len); --- Create a dummy page to satisfy fk contraints where a page_id of "0" is added -INSERT INTO page (page_id,page_namespace,page_title,page_random,page_latest,page_len) -VALUES (0,0,'',0.0,0,0); +CREATE FUNCTION page_deleted() RETURNS TRIGGER LANGUAGE plpgsql AS +$mw$ +BEGIN +DELETE FROM recentchanges WHERE rc_namespace = OLD.page_namespace AND rc_title = OLD.page_title; +RETURN NULL; +END; +$mw$; +CREATE TRIGGER page_deleted AFTER DELETE ON page + FOR EACH ROW EXECUTE PROCEDURE page_deleted(); CREATE SEQUENCE rev_rev_id_val; CREATE TABLE revision ( rev_id INTEGER NOT NULL UNIQUE DEFAULT nextval('rev_rev_id_val'), - rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE SET NULL, + rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE CASCADE, rev_text_id INTEGER NULL, -- FK rev_comment TEXT, - rev_user INTEGER NOT NULL REFERENCES "user"(user_id), + rev_user INTEGER NOT NULL REFERENCES mwuser(user_id), rev_user_text TEXT NOT NULL, rev_timestamp TIMESTAMPTZ NOT NULL, rev_minor_edit CHAR NOT NULL DEFAULT '0', @@ -93,19 +99,19 @@ CREATE INDEX rev_user_text_idx ON revision (rev_user_text); CREATE SEQUENCE text_old_id_val; -CREATE TABLE "text" ( +CREATE TABLE pagecontent ( -- replaces reserved word 'text' old_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('text_old_id_val'), old_text TEXT, old_flags TEXT ); -CREATE TABLE archive ( +CREATE TABLE archive2 ( ar_namespace SMALLINT NOT NULL, ar_title TEXT NOT NULL, ar_text TEXT, ar_comment TEXT, - ar_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + ar_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, ar_user_text TEXT NOT NULL, ar_timestamp TIMESTAMPTZ NOT NULL, ar_minor_edit CHAR NOT NULL DEFAULT '0', @@ -113,7 +119,22 @@ CREATE TABLE archive ( ar_rev_id INTEGER, ar_text_id INTEGER ); -CREATE INDEX archive_name_title_timestamp ON archive (ar_namespace,ar_title,ar_timestamp); +CREATE INDEX archive_name_title_timestamp ON archive2 (ar_namespace,ar_title,ar_timestamp); + +-- This is the easiest way to work around the char(15) timestamp hack without modifying PHP code +CREATE VIEW archive AS +SELECT + ar_namespace, ar_title, ar_text, ar_comment, ar_user, ar_user_text, + ar_minor_edit, ar_flags, ar_rev_id, ar_text_id, + TO_CHAR(ar_timestamp, 'YYYYMMDDHH24MISS') AS ar_timestamp +FROM archive2; + +CREATE RULE archive_insert AS ON INSERT TO archive +DO INSTEAD INSERT INTO archive2 VALUES ( + NEW.ar_namespace, NEW.ar_title, NEW.ar_text, NEW.ar_comment, NEW.ar_user, NEW.ar_user_text, + TO_DATE(NEW.ar_timestamp, 'YYYYMMDDHH24MISS'), + NEW.ar_minor_edit, NEW.ar_flags, NEW.ar_rev_id, NEW.ar_text_id +); CREATE TABLE pagelinks ( @@ -121,7 +142,7 @@ CREATE TABLE pagelinks ( pl_namespace SMALLINT NOT NULL, pl_title TEXT NOT NULL ); -CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_namespace,pl_title,pl_from); +CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_from,pl_namespace,pl_title); CREATE TABLE templatelinks ( tl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, @@ -180,16 +201,18 @@ CREATE TABLE hitcounter ( CREATE SEQUENCE ipblocks_ipb_id_val; CREATE TABLE ipblocks ( - ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'), - ipb_address CIDR NULL, - ipb_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, - ipb_by INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, - ipb_reason TEXT NOT NULL, - ipb_timestamp TIMESTAMPTZ NOT NULL, - ipb_auto CHAR NOT NULL DEFAULT '0', - ipb_expiry TIMESTAMPTZ NOT NULL, - ipb_range_start TEXT, - ipb_range_end TEXT + ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'), + ipb_address CIDR NULL, + ipb_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, + ipb_by INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, + ipb_reason TEXT NOT NULL, + ipb_timestamp TIMESTAMPTZ NOT NULL, + ipb_auto CHAR NOT NULL DEFAULT '0', + ipb_anon_only CHAR NOT NULL DEFAULT '0', + ipb_create_account CHAR NOT NULL DEFAULT '1', + ipb_expiry TIMESTAMPTZ NOT NULL, + ipb_range_start TEXT, + ipb_range_end TEXT ); CREATE INDEX ipb_address ON ipblocks (ipb_address); CREATE INDEX ipb_user ON ipblocks (ipb_user); @@ -198,16 +221,16 @@ CREATE INDEX ipb_range ON ipblocks (ipb_range_start,ipb_range_end); CREATE TABLE image ( img_name TEXT NOT NULL PRIMARY KEY, - img_size SMALLINT NOT NULL, - img_width SMALLINT NOT NULL, - img_height SMALLINT NOT NULL, + img_size INTEGER NOT NULL, + img_width INTEGER NOT NULL, + img_height INTEGER NOT NULL, img_metadata TEXT, img_bits SMALLINT, img_media_type TEXT, img_major_mime TEXT DEFAULT 'unknown', img_minor_mime TEXT DEFAULT 'unknown', img_description TEXT NOT NULL, - img_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + img_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, img_user_text TEXT NOT NULL, img_timestamp TIMESTAMPTZ ); @@ -217,12 +240,12 @@ CREATE INDEX img_timestamp_idx ON image (img_timestamp); CREATE TABLE oldimage ( oi_name TEXT NOT NULL REFERENCES image(img_name), oi_archive_name TEXT NOT NULL, - oi_size SMALLINT NOT NULL, - oi_width SMALLINT NOT NULL, - oi_height SMALLINT NOT NULL, + oi_size INTEGER NOT NULL, + oi_width INTEGER NOT NULL, + oi_height INTEGER NOT NULL, oi_bits SMALLINT NOT NULL, oi_description TEXT, - oi_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + oi_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, oi_user_text TEXT NOT NULL, oi_timestamp TIMESTAMPTZ NOT NULL ); @@ -235,7 +258,7 @@ CREATE TABLE filearchive ( fa_archive_name TEXT, fa_storage_group VARCHAR(16), fa_storage_key CHAR(64), - fa_deleted_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + fa_deleted_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, fa_deleted_timestamp TIMESTAMPTZ NOT NULL, fa_deleted_reason TEXT, fa_size SMALLINT NOT NULL, @@ -247,7 +270,7 @@ CREATE TABLE filearchive ( fa_major_mime TEXT DEFAULT 'unknown', fa_minor_mime TEXT DEFAULT 'unknown', fa_description TEXT NOT NULL, - fa_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + fa_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, fa_user_text TEXT NOT NULL, fa_timestamp TIMESTAMPTZ ); @@ -262,7 +285,7 @@ CREATE TABLE recentchanges ( rc_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('rc_rc_id_seq'), rc_timestamp TIMESTAMPTZ NOT NULL, rc_cur_time TIMESTAMPTZ NOT NULL, - rc_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL, + rc_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, rc_user_text TEXT NOT NULL, rc_namespace SMALLINT NOT NULL, rc_title TEXT NOT NULL, @@ -270,7 +293,7 @@ CREATE TABLE recentchanges ( rc_minor CHAR NOT NULL DEFAULT '0', rc_bot CHAR NOT NULL DEFAULT '0', rc_new CHAR NOT NULL DEFAULT '0', - rc_cur_id INTEGER NOT NULL REFERENCES page(page_id), + rc_cur_id INTEGER NULL REFERENCES page(page_id) ON DELETE SET NULL, rc_this_oldid INTEGER NOT NULL, rc_last_oldid INTEGER NOT NULL, rc_type CHAR NOT NULL DEFAULT '0', @@ -287,7 +310,7 @@ CREATE INDEX rc_ip ON recentchanges (rc_ip); CREATE TABLE watchlist ( - wl_user INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE, + wl_user INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, wl_namespace SMALLINT NOT NULL DEFAULT 0, wl_title TEXT NOT NULL, wl_notificationtimestamp TIMESTAMPTZ @@ -343,7 +366,7 @@ CREATE TABLE logging ( log_type TEXT NOT NULL, log_action TEXT NOT NULL, log_timestamp TIMESTAMPTZ NOT NULL, - log_user INTEGER REFERENCES "user"(user_id) ON DELETE SET NULL, + log_user INTEGER REFERENCES mwuser(user_id) ON DELETE SET NULL, log_namespace SMALLINT NOT NULL, log_title TEXT NOT NULL, log_comment TEXT, @@ -383,38 +406,71 @@ CREATE FUNCTION ts2_page_title() RETURNS TRIGGER LANGUAGE plpgsql AS $mw$ BEGIN IF TG_OP = 'INSERT' THEN - NEW.titlevector = to_tsvector(NEW.page_title); + NEW.titlevector = to_tsvector('default',NEW.page_title); ELSIF NEW.page_title != OLD.page_title THEN - NEW.titlevector := to_tsvector(NEW.page_title); + NEW.titlevector := to_tsvector('default',NEW.page_title); END IF; RETURN NEW; END; $mw$; CREATE TRIGGER ts2_page_title BEFORE INSERT OR UPDATE ON page -FOR EACH ROW EXECUTE PROCEDURE ts2_page_title(); + FOR EACH ROW EXECUTE PROCEDURE ts2_page_title(); -ALTER TABLE text ADD textvector tsvector; -CREATE INDEX ts2_page_text ON text USING gist(textvector); +ALTER TABLE pagecontent ADD textvector tsvector; +CREATE INDEX ts2_page_text ON pagecontent USING gist(textvector); CREATE FUNCTION ts2_page_text() RETURNS TRIGGER LANGUAGE plpgsql AS $mw$ BEGIN IF TG_OP = 'INSERT' THEN - NEW.textvector = to_tsvector(NEW.old_text); + NEW.textvector = to_tsvector('default',NEW.old_text); ELSIF NEW.old_text != OLD.old_text THEN - NEW.textvector := to_tsvector(NEW.old_text); + NEW.textvector := to_tsvector('default',NEW.old_text); END IF; RETURN NEW; END; $mw$; -CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON text -FOR EACH ROW EXECUTE PROCEDURE ts2_page_text(); +CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON pagecontent + FOR EACH ROW EXECUTE PROCEDURE ts2_page_text(); -CREATE OR REPLACE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS +CREATE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS $mw$ INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES ($1,$2,$3); SELECT 1; $mw$; + +-- This table is not used unless profiling is turned on +CREATE TABLE profiling ( + pf_count INTEGER NOT NULL DEFAULT 0, + pf_time NUMERIC(18,10) NOT NULL DEFAULT 0, + pf_name TEXT NOT NULL, + pf_server TEXT NULL +); +CREATE UNIQUE INDEX pf_name_server ON profiling (pf_name, pf_server); + + +CREATE TABLE mediawiki_version ( + type TEXT NOT NULL, + mw_version TEXT NOT NULL, + notes TEXT NULL, + + pg_version TEXT NULL, + pg_dbname TEXT NULL, + pg_user TEXT NULL, + pg_port TEXT NULL, + mw_schema TEXT NULL, + ts2_schema TEXT NULL, + ctype TEXT NULL, + + sql_version TEXT NULL, + sql_date TEXT NULL, + cdate TIMESTAMPTZ NOT NULL DEFAULT now() +); + +INSERT INTO mediawiki_version (type,mw_version,sql_version,sql_date) + VALUES ('Creation','??','$LastChangedRevision: 16747 $','$LastChangedDate: 2006-10-02 17:55:26 -0700 (Mon, 02 Oct 2006) $'); + + COMMIT; diff --git a/maintenance/postgres/wp_mysql2postgres.pl b/maintenance/postgres/wp_mysql2postgres.pl new file mode 100644 index 00000000..788d9e0b --- /dev/null +++ b/maintenance/postgres/wp_mysql2postgres.pl @@ -0,0 +1,400 @@ +#!/usr/bin/perl + +## Convert data from a MySQL mediawiki database into a Postgres mediawiki database +## svn: $Id: wp_mysql2postgres.pl 16088 2006-08-16 01:12:20Z greg $ + +use strict; +use warnings; +use Data::Dumper; +use Getopt::Long; + +use vars qw(%table %tz %special @torder $COM); +my $VERSION = "1.0"; + +## The following options can be changed via command line arguments: +my $MYSQLDB = 'wikidb'; +my $MYSQLUSER = 'wikiuser'; + +## If the following are zero-length, we omit their arguments entirely: +my $MYSQLHOST = ''; +my $MYSQLPASSWORD = ''; +my $MYSQLSOCKET = ''; + +## Name of the dump file created +my $MYSQLDUMPFILE = "mediawiki_upgrade.pg"; + +## How verbose should this script be (0, 1, or 2) +my $verbose = 0; + +my $USAGE = " +Usage: $0 [OPTION]... +Convert a MediaWiki schema from MySQL to Postgres +Example: $0 --db=wikidb --user=wikiuser --pass=sushi +Options: + db Name of the MySQL database + user MySQL database username + pass MySQL database password + host MySQL database host + socket MySQL database socket + verbose Verbosity, increases with multiple uses +"; + +GetOptions + ( + "db=s" => \$MYSQLDB, + "user=s" => \$MYSQLUSER, + "pass=s" => \$MYSQLPASSWORD, + "host=s" => \$MYSQLHOST, + "socket=s" => \$MYSQLSOCKET, + "verbose+" => \$verbose + ); + +## The Postgres schema file: should not be changed +my $PG_SCHEMA = "tables.sql"; + +## What version we default to when we can't parse the old schema +my $MW_DEFAULT_VERSION = '1.8'; + +## Try and find a working version of mysqldump +$verbose and warn "Locating the mysqldump executable\n"; +my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump"); +my $MYSQLDUMP; +for my $mytry (@MYSQLDUMP) { + next if ! -e $mytry; + -x $mytry or die qq{Not an executable file: "$mytry"\n}; + my $version = qx{$mytry -V}; + $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n}; + $MYSQLDUMP = $mytry; +} +$MYSQLDUMP or die qq{Could not find the mysqldump program\n}; + +## Flags we use for mysqldump +my @MYSQLDUMPARGS = qw( +--skip-lock-tables +--complete-insert +--skip-extended-insert +--skip-add-drop-table +--skip-add-locks +--skip-disable-keys +--skip-set-charset +--skip-comments +--skip-quote-names +); + + +$verbose and warn "Checking that mysqldump can handle our flags\n"; +## Make sure this version can handle all the flags we want. +## Combine with user dump below +my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS; +## Argh. Any way to make this work on Win32? +my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1}; +if ($version =~ /unknown option/) { + die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n}; +} + +push @MYSQLDUMPARGS, "--user=$MYSQLUSER"; +length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD"; +length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST"; + +## Open the dump file to hold the mysqldump output +open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n}; +$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; + +open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB; +my $oldselect = select $mdump; + +print while <$mfork2>; + +## Slurp in the current schema +my $current_schema; +seek $mdump, 0, 0; +{ + local $/; + $current_schema = <$mdump>; +} +seek $mdump, 0, 0; +truncate $mdump, 0; + +warn qq{Trying to determine database version...\n} if $verbose; + +my $current_version = 0; +if ($current_schema =~ /CREATE TABLE \S+cur /) { + $current_version = '1.3'; +} +elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) { + $current_version = '1.4'; +} +elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) { + $current_version = '1.5'; +} +elsif ($current_schema !~ /CREATE TABLE \S+validate /) { + $current_version = '1.6'; +} +elsif ($current_schema !~ /ipb_auto tinyint/) { + $current_version = '1.7'; +} +else { + $current_version = '1.8'; +} + +if (!$current_version) { + warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n}; + $current_version = $MW_DEFAULT_VERSION; +} + +## Check for a table prefix: +my $table_prefix = ''; +if ($current_version =~ /CREATE TABLE (\S+)archive /) { + $table_prefix = $1; +} + +warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose; +warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix; + +$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; +my $now = scalar localtime(); +my $conninfo = ''; +$MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST"; +$MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET"; + +print qq{ +-- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema +-- Performed by the program: $0 +-- Version: $VERSION (subversion }.q{$LastChangedRevision: 16088 $}.qq{) +-- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome +-- +-- This file was created: $now +-- Executable used: $MYSQLDUMP +-- Connection information: +-- database: $MYSQLDB +-- user: $MYSQLUSER$conninfo + +-- This file can be imported manually with psql like so: +-- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename +-- This will overwrite any existing MediaWiki information, so be careful + + +}; + +warn qq{Reading in the Postgres schema information\n} if $verbose; +open my $schema, "<", $PG_SCHEMA + or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n}; +my $t; +while (<$schema>) { + if (/CREATE TABLE\s+(\S+)/) { + $t = $1; + $table{$t}={}; + } + elsif (/^ +(\w+)\s+TIMESTAMP/) { + $tz{$t}{$1}++; + } + elsif (/REFERENCES\s*([^( ]+)/) { + my $ref = $1; + exists $table{$ref} or die qq{No parent table $ref found for $t\n}; + $table{$t}{$ref}++; + } +} +close $schema; + +## Read in special cases and table/version information +$verbose and warn qq{Reading in schema exception information\n}; +my %version_tables; +while (<DATA>) { + if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) { + my $list = join '|' => split /\s+/ => $2; + $version_tables{$1} = qr{\b$list\b}; + next; + } + next unless /^(\w+)\s*(.*)/; + $special{$1} = $2||''; + $special{$2} = $1 if length $2; +} + +## Determine the order of tables based on foreign key constraints +$verbose and warn qq{Figuring out order of tables to dump\n}; +my %dumped; +my $bail = 0; +{ + my $found=0; + T: for my $t (sort keys %table) { + next if exists $dumped{$t} and $dumped{$t} >= 1; + $found=1; + for my $dep (sort keys %{$table{$t}}) { + next T if ! exists $dumped{$dep} or $dumped{$dep} < 0; + } + $dumped{$t} = -1 if ! exists $dumped{$t}; + ## Skip certain tables that are not imported + next if exists $special{$t} and !$special{$t}; + push @torder, $special{$t} || $t; + } + last if !$found; + push @torder, "---"; + for (values %dumped) { $_+=2; } + die "Too many loops!\n" if $bail++ > 1000; + redo; +} + +## Prepare the Postgres database for the move +$verbose and warn qq{Writing Postgres transformation information\n}; + +print "\n-- Empty out all existing tables\n"; +$verbose and warn qq{Writing truncates to empty existing tables\n}; +for my $t (@torder) { + next if $t eq '---'; + my $tname = $special{$t}||$t; + printf qq{TRUNCATE TABLE %-18s CASCADE;\n}, qq{"$tname"}; +} +print "\n\n"; + +print qq{-- Rename the "text" table\n}; +print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n}; + +print qq{-- Allow rc_ip to contain empty string, will convert at end\n}; +print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n}; + +print "-- Changing all timestamp fields to handle raw integers\n"; +for my $t (sort keys %tz) { + next if $t eq "archive2"; + for my $c (sort keys %{$tz{$t}}) { + printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c; + } +} +print "\n"; + +print qq{ +INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10); +}; + +## If we have a table _prefix, we need to temporarily rename all of our Postgres +## tables temporarily for the import. Perhaps consider making this an auto-schema +## thing in the future. +if (length $table_prefix) { + print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n}; + for my $t (@torder) { + next if $t eq '---'; + my $tname = $special{$t}||$t; + printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"}; + } +} + + +## Try and dump the ill-named "user" table: +## We do this table alone because "user" is a reserved word. +print qq{ + +SET escape_string_warning TO 'off'; +\\o /dev/null + +-- Postgres uses a table name of "mwuser" instead of "user" + +-- Create a dummy user to satisfy fk contraints especially with revisions +SELECT setval('user_user_id_seq',0,'false'); +INSERT INTO mwuser + VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); + +}; + +push @MYSQLDUMPARGS, "--no-create-info"; + +$verbose and warn qq{Dumping "user" table\n}; +$verbose > 2 and warn Dumper \@MYSQLDUMPARGS; +my $usertable = "${table_prefix}user"; +open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable; +## Unfortunately, there is no easy way to catch errors +my $numusers = 0; +while (<$mfork>) { + ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/; +} +close $mfork; +if ($numusers < 1) { + warn qq{No users found, probably a connection error.\n}; + print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n}; + close $mdump; + exit; +} +print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n"; + +warn qq{Dumping all other tables from the MySQL schema\n} if $verbose; + +## Dump the rest of the tables, in chunks based on constraints +## We do not need the user table: +my @dumplist = grep { $_ ne 'user'} @torder; +my @alist; +{ + undef @alist; + PICKATABLE: { + my $tname = shift @dumplist; + ## XXX Make this dynamic below + for my $ver (sort {$b <=> $a } keys %version_tables) { + redo PICKATABLE if $tname =~ $version_tables{$ver}; + } + $tname = "${table_prefix}$tname" if length $table_prefix; + push @alist, $tname; + pop @alist and last if index($alist[-1],'---') >= 0; + redo if @dumplist; + } + + ## Dump everything else + open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist; + print while <$mfork2>; + close $mfork2; + warn qq{Finished dumping from MySQL\n} if $verbose; + + redo if @dumplist; +} + +warn qq{Writing information to return Postgres database to normal\n} if $verbose; +print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n}; +print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n}; +print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n}; + +## Return tables to their original names if a table prefix was used. +if (length $table_prefix) { + print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n}; + my $maxsize = 18; + for (@torder) { + $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize; + } + for my $t (@torder) { + next if $t eq '---' or $t eq 'text'; + my $tname = $special{$t}||$t; + printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"}; + } +} + +print qq{\n\n--Returning timestamps to normal\n}; +for my $t (sort keys %tz) { + next if $t eq "archive2"; + for my $c (sort keys %{$tz{$t}}) { + printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n". + " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c; + } +} + +## Finally, make a record in the mediawiki_version table about this import +print qq{ +INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??', +'Imported from file created on $now. Old version: $current_version'); +}; + + +print "\\o\n\n-- End of dump\n\n"; +select $oldselect; +close $mdump; +exit; + + +__DATA__ +## Known remappings: either indicate the MySQL name, +## or leave blank if it should be skipped +pagecontent text +mwuser user +mediawiki_version +archive2 +profiling +objectcache + +## Which tables to ignore depending on the version +VERSION 1.5: trackback +VERSION 1.6: externallinks job templatelinks transcache +VERSION 1.7: filearchive langlinks querycache_info |