diff options
Diffstat (limited to 'maintenance/postgres')
19 files changed, 609 insertions, 46 deletions
diff --git a/maintenance/postgres/archives/patch-archive-ar_deleted.sql b/maintenance/postgres/archives/patch-archive-ar_deleted.sql new file mode 100644 index 00000000..08bc1e37 --- /dev/null +++ b/maintenance/postgres/archives/patch-archive-ar_deleted.sql @@ -0,0 +1 @@ +ALTER TABLE archive ADD ar_deleted INTEGER NOT NULL DEFAULT '0'; diff --git a/maintenance/postgres/archives/patch-archive2.sql b/maintenance/postgres/archives/patch-archive2.sql new file mode 100644 index 00000000..fa900cbf --- /dev/null +++ b/maintenance/postgres/archives/patch-archive2.sql @@ -0,0 +1,15 @@ +ALTER TABLE archive RENAME to archive2; +CREATE VIEW archive AS +SELECT + ar_namespace, ar_title, ar_text, ar_comment, ar_user, ar_user_text, + ar_minor_edit, ar_flags, ar_rev_id, ar_text_id, + TO_CHAR(ar_timestamp, 'YYYYMMDDHH24MISS') AS ar_timestamp +FROM archive2; + +CREATE RULE archive_insert AS ON INSERT TO archive +DO INSTEAD INSERT INTO archive2 VALUES ( + NEW.ar_namespace, NEW.ar_title, NEW.ar_text, NEW.ar_comment, NEW.ar_user, NEW.ar_user_text, + TO_DATE(NEW.ar_timestamp, 'YYYYMMDDHH24MISS'), + NEW.ar_minor_edit, NEW.ar_flags, NEW.ar_rev_id, NEW.ar_text_id +); + diff --git a/maintenance/postgres/archives/patch-archive_delete.sql b/maintenance/postgres/archives/patch-archive_delete.sql new file mode 100644 index 00000000..4a864c3b --- /dev/null +++ b/maintenance/postgres/archives/patch-archive_delete.sql @@ -0,0 +1,5 @@ +CREATE RULE archive_delete AS ON DELETE TO archive +DO INSTEAD DELETE FROM archive2 WHERE + archive2.ar_title = OLD.ar_title AND + archive2.ar_namespace = OLD.ar_namespace AND + archive2.ar_rev_id = OLD.ar_rev_id; diff --git a/maintenance/postgres/archives/patch-archive_insert.sql b/maintenance/postgres/archives/patch-archive_insert.sql new file mode 100644 index 00000000..ca13d2a2 --- /dev/null +++ b/maintenance/postgres/archives/patch-archive_insert.sql @@ -0,0 +1,6 @@ +CREATE OR REPLACE RULE archive_insert AS ON INSERT TO archive +DO INSTEAD INSERT INTO archive2 VALUES ( + NEW.ar_namespace, NEW.ar_title, NEW.ar_text, NEW.ar_comment, NEW.ar_user, NEW.ar_user_text, + TO_TIMESTAMP(NEW.ar_timestamp, 'YYYYMMDDHH24MISS'), + NEW.ar_minor_edit, NEW.ar_flags, NEW.ar_rev_id, NEW.ar_text_id +); diff --git a/maintenance/postgres/archives/patch-mediawiki_version.sql b/maintenance/postgres/archives/patch-mediawiki_version.sql new file mode 100644 index 00000000..811b38a1 --- /dev/null +++ b/maintenance/postgres/archives/patch-mediawiki_version.sql @@ -0,0 +1,18 @@ +CREATE TABLE mediawiki_version ( + type TEXT NOT NULL, + mw_version TEXT NOT NULL, + notes TEXT NULL, + + pg_version TEXT NULL, + pg_dbname TEXT NULL, + pg_user TEXT NULL, + pg_port TEXT NULL, + mw_schema TEXT NULL, + ts2_schema TEXT NULL, + ctype TEXT NULL, + + sql_version TEXT NULL, + sql_date TEXT NULL, + cdate TIMESTAMPTZ NOT NULL DEFAULT now() +); + diff --git a/maintenance/postgres/archives/patch-mwuser.sql b/maintenance/postgres/archives/patch-mwuser.sql new file mode 100644 index 00000000..3984703a --- /dev/null +++ b/maintenance/postgres/archives/patch-mwuser.sql @@ -0,0 +1 @@ +ALTER TABLE "user" RENAME TO mwuser; diff --git a/maintenance/postgres/archives/patch-page_deleted.sql b/maintenance/postgres/archives/patch-page_deleted.sql new file mode 100644 index 00000000..5b0782cb --- /dev/null +++ b/maintenance/postgres/archives/patch-page_deleted.sql @@ -0,0 +1,11 @@ +CREATE FUNCTION page_deleted() RETURNS TRIGGER LANGUAGE plpgsql AS +$mw$ +BEGIN +DELETE FROM recentchanges WHERE rc_namespace = OLD.page_namespace AND rc_title = OLD.page_title; +RETURN NULL; +END; +$mw$; + +CREATE TRIGGER page_deleted AFTER DELETE ON page + FOR EACH ROW EXECUTE PROCEDURE page_deleted(); + diff --git a/maintenance/postgres/archives/patch-page_restrictions.sql b/maintenance/postgres/archives/patch-page_restrictions.sql new file mode 100644 index 00000000..1faa14a9 --- /dev/null +++ b/maintenance/postgres/archives/patch-page_restrictions.sql @@ -0,0 +1,10 @@ +CREATE TABLE page_restrictions ( + pr_page INTEGER NULL REFERENCES page (page_id) ON DELETE CASCADE, + pr_type TEXT NOT NULL, + pr_level TEXT NOT NULL, + pr_cascade SMALLINT NOT NULL, + pr_user INTEGER NULL, + pr_expiry TIMESTAMPTZ NULL +); +ALTER TABLE page_restrictions ADD CONSTRAINT page_restrictions_pk PRIMARY KEY (pr_page,pr_type); + diff --git a/maintenance/postgres/archives/patch-pagecontent.sql b/maintenance/postgres/archives/patch-pagecontent.sql new file mode 100644 index 00000000..c3651f92 --- /dev/null +++ b/maintenance/postgres/archives/patch-pagecontent.sql @@ -0,0 +1 @@ +ALTER TABLE "text" RENAME TO pagecontent; diff --git a/maintenance/postgres/archives/patch-profiling.sql b/maintenance/postgres/archives/patch-profiling.sql new file mode 100644 index 00000000..1c4dce4e --- /dev/null +++ b/maintenance/postgres/archives/patch-profiling.sql @@ -0,0 +1,7 @@ +CREATE TABLE profiling ( + pf_count INTEGER NOT NULL DEFAULT 0, + pf_time NUMERIC(18,10) NOT NULL DEFAULT 0, + pf_name TEXT NOT NULL, + pf_server TEXT NULL +); +CREATE UNIQUE INDEX pf_name_server ON profiling (pf_name, pf_server); diff --git a/maintenance/postgres/archives/patch-querycachetwo.sql b/maintenance/postgres/archives/patch-querycachetwo.sql new file mode 100644 index 00000000..cb70cd89 --- /dev/null +++ b/maintenance/postgres/archives/patch-querycachetwo.sql @@ -0,0 +1,12 @@ +CREATE TABLE querycachetwo ( + qcc_type TEXT NOT NULL, + qcc_value SMALLINT NOT NULL DEFAULT 0, + qcc_namespace INTEGER NOT NULL DEFAULT 0, + qcc_title TEXT NOT NULL DEFAULT '', + qcc_namespacetwo INTEGER NOT NULL DEFAULT 0, + qcc_titletwo TEXT NOT NULL DEFAULT '' +); +CREATE INDEX querycachetwo_type_value ON querycachetwo (qcc_type, qcc_value); +CREATE INDEX querycachetwo_title ON querycachetwo (qcc_type,qcc_namespace,qcc_title); +CREATE INDEX querycachetwo_titletwo ON querycachetwo (qcc_type,qcc_namespacetwo,qcc_titletwo); + diff --git a/maintenance/postgres/archives/patch-rc_cur_id-not-null.sql b/maintenance/postgres/archives/patch-rc_cur_id-not-null.sql new file mode 100644 index 00000000..2ca7edbf --- /dev/null +++ b/maintenance/postgres/archives/patch-rc_cur_id-not-null.sql @@ -0,0 +1 @@ +ALTER TABLE recentchanges ALTER rc_cur_id DROP NOT NULL; diff --git a/maintenance/postgres/archives/patch-redirect.sql b/maintenance/postgres/archives/patch-redirect.sql new file mode 100644 index 00000000..d2922d3b --- /dev/null +++ b/maintenance/postgres/archives/patch-redirect.sql @@ -0,0 +1,7 @@ +CREATE TABLE redirect ( + rd_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE, + rd_namespace SMALLINT NOT NULL, + rd_title TEXT NOT NULL +); +CREATE INDEX redirect_ns_title ON redirect (rd_namespace,rd_title,rd_from); + diff --git a/maintenance/postgres/archives/patch-remove-archive2.sql b/maintenance/postgres/archives/patch-remove-archive2.sql new file mode 100644 index 00000000..20bac385 --- /dev/null +++ b/maintenance/postgres/archives/patch-remove-archive2.sql @@ -0,0 +1,3 @@ +DROP VIEW archive; +ALTER TABLE archive2 RENAME TO archive; +ALTER TABLE archive ADD ar_len INTEGER; diff --git a/maintenance/postgres/archives/patch-rev_text_id_idx.sql b/maintenance/postgres/archives/patch-rev_text_id_idx.sql new file mode 100644 index 00000000..036c0be3 --- /dev/null +++ b/maintenance/postgres/archives/patch-rev_text_id_idx.sql @@ -0,0 +1 @@ +CREATE INDEX rev_text_id_idx ON revision (rev_text_id); diff --git a/maintenance/postgres/archives/patch-revision_rev_user_fkey.sql b/maintenance/postgres/archives/patch-revision_rev_user_fkey.sql new file mode 100644 index 00000000..721aadd5 --- /dev/null +++ b/maintenance/postgres/archives/patch-revision_rev_user_fkey.sql @@ -0,0 +1,4 @@ +ALTER TABLE revision DROP CONSTRAINT revision_rev_user_fkey; +ALTER TABLE revision ADD CONSTRAINT revision_rev_user_fkey + FOREIGN KEY (rev_user) REFERENCES mwuser(user_id) ON DELETE RESTRICT; + diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl index cdbbdf41..ce045fef 100644 --- a/maintenance/postgres/compare_schemas.pl +++ b/maintenance/postgres/compare_schemas.pl @@ -7,8 +7,8 @@ use strict; use warnings; use Data::Dumper; -my @old = ("../tables.sql", "../mysql5/tables.sql", "../mysql5/tables-binary.sql"); -my $new = "tables.sql"; +my @old = ('../tables.sql'); +my $new = 'tables.sql'; my @xfile; ## Read in exceptions and other metadata @@ -27,7 +27,7 @@ while (<DATA>) { push @xfile, $val; next; } - for (split(/\s+/ => $val)) { + for (split /\s+/ => $val) { $ok{$name}{$_} = 0; } } @@ -46,16 +46,16 @@ my $typeval = qr{(\(\d+\))?}; my $typeval2 = qr{ unsigned| binary| NOT NULL| NULL| auto_increment| default ['\-\d\w"]+| REFERENCES .+CASCADE}; -my $indextype = join '|' => qw(INDEX KEY FULLTEXT), "PRIMARY KEY", "UNIQUE INDEX", "UNIQUE KEY"; +my $indextype = join '|' => qw(INDEX KEY FULLTEXT), 'PRIMARY KEY', 'UNIQUE INDEX', 'UNIQUE KEY'; $indextype = qr{$indextype}; my $engine = qr{TYPE|ENGINE}; -my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+}; +my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+|InnoDB MAX_ROWS=\d+ AVG_ROW_LENGTH=\d+}; my $charset = qr{utf8|binary}; -open my $newfh, "<", $new or die qq{Could not open $new: $!\n}; +open my $newfh, '<', $new or die qq{Could not open $new: $!\n}; my ($table,%old); @@ -83,7 +83,7 @@ sub parse_sql { my $oldfile = shift; - open my $oldfh, "<", $oldfile or die qq{Could not open $oldfile: $!\n}; + open my $oldfh, '<', $oldfile or die qq{Could not open $oldfile: $!\n}; my %info; while (<$oldfh>) { @@ -97,6 +97,10 @@ sub parse_sql { $table = $1; $info{$table}{name}=$table; } + elsif (m#^\) /\*\$wgDBTableOptions\*/#) { + $info{$table}{engine} = 'TYPE'; + $info{$table}{type} = 'variable'; + } elsif (/^\) ($engine)=($tabletype);$/) { $info{$table}{engine}=$1; $info{$table}{type}=$2; @@ -110,8 +114,8 @@ sub parse_sql { $info{$table}{column}{$1} = $2; } elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) { - $info{$table}{lc $1."_name"} = $2 ? $2 : ""; - $info{$table}{lc $1."pk_target"} = $3; + $info{$table}{lc $1.'_name'} = $2 ? $2 : ''; + $info{$table}{lc $1.'pk_target'} = $3; } else { die "Cannot parse line $. of $oldfile:\n$_\n"; @@ -209,13 +213,13 @@ for my $t (sort keys %{$old{$oldfile}}) { my $newcol = $new{$newt}{column}; for my $c (keys %$oldcol) { if (!exists $newcol->{$c}) { - print "Column $t.$c not in new\n"; + print "Column $t.$c not in $new\n"; next; } } for my $c (keys %$newcol) { if (!exists $oldcol->{$c}) { - print "Column $t.$c not in old\n"; + print "Column $t.$c not in $oldfile\n"; next; } } @@ -223,7 +227,7 @@ for my $t (sort keys %{$old{$oldfile}}) { ## New but not old: for (sort keys %new) { if (!exists $old{$oldfile}{$_} and !exists $ok{NEW}{$_}) { - print "Not in old: $_\n"; + print "Not in $oldfile: $_\n"; next; } } @@ -235,9 +239,7 @@ for (sort keys %new) { __DATA__ ## Known exceptions OLD: searchindex ## We use tsearch2 directly on the page table instead -OLD: archive ## This is a view due to the char(14) timestamp hack RENAME: user mwuser ## Reserved word causing lots of problems RENAME: text pagecontent ## Reserved word -NEW: archive2 ## The real archive table NEW: mediawiki_version ## Just us, for now XFILE: ../archives/patch-profiling.sql diff --git a/maintenance/postgres/mediawiki_mysql2postgres.pl b/maintenance/postgres/mediawiki_mysql2postgres.pl new file mode 100644 index 00000000..733af08f --- /dev/null +++ b/maintenance/postgres/mediawiki_mysql2postgres.pl @@ -0,0 +1,444 @@ +#!/usr/bin/perl + +## Convert data from a MySQL mediawiki database into a Postgres mediawiki database +## svn: $Id: mediawiki_mysql2postgres.pl 21254 2007-04-14 02:10:03Z greg $ + +use strict; +use warnings; +use Data::Dumper; +use Getopt::Long; + +use vars qw(%table %tz %special @torder $COM); +my $VERSION = '1.2'; + +## The following options can be changed via command line arguments: +my $MYSQLDB = ''; +my $MYSQLUSER = ''; + +## If the following are zero-length, we omit their arguments entirely: +my $MYSQLHOST = ''; +my $MYSQLPASSWORD = ''; +my $MYSQLSOCKET = ''; + +## Name of the dump file created +my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg'; + +## How verbose should this script be (0, 1, or 2) +my $verbose = 0; + +my $help = 0; + +my $USAGE = " +Usage: $0 --db=<dbname> --user=<user> [OPTION]... +Example: $0 --db=wikidb --user=wikiuser --pass=sushi + +Converts a MediaWiki schema from MySQL to Postgres +Options: + db Name of the MySQL database + user MySQL database username + pass MySQL database password + host MySQL database host + socket MySQL database socket + verbose Verbosity, increases with multiple uses +"; + +GetOptions + ( + 'db=s' => \$MYSQLDB, + 'user=s' => \$MYSQLUSER, + 'pass=s' => \$MYSQLPASSWORD, + 'host=s' => \$MYSQLHOST, + 'socket=s' => \$MYSQLSOCKET, + 'verbose+' => \$verbose, + 'help' => \$help, + ); + +die $USAGE + if ! length $MYSQLDB + or ! length $MYSQLUSER + or $help; + +## The Postgres schema file: should not be changed +my $PG_SCHEMA = 'tables.sql'; + +## What version we default to when we can't parse the old schema +my $MW_DEFAULT_VERSION = 110; + +## Try and find a working version of mysqldump +$verbose and warn "Locating the mysqldump executable\n"; +my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump'); +my $MYSQLDUMP; +for my $mytry (@MYSQLDUMP) { + next if ! -e $mytry; + -x $mytry or die qq{Not an executable file: "$mytry"\n}; + my $version = qx{$mytry -V}; + $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n}; + $MYSQLDUMP = $mytry; +} +$MYSQLDUMP or die qq{Could not find the mysqldump program\n}; + +## Flags we use for mysqldump +my @MYSQLDUMPARGS = qw( +--skip-lock-tables +--complete-insert +--skip-extended-insert +--skip-add-drop-table +--skip-add-locks +--skip-disable-keys +--skip-set-charset +--skip-comments +--skip-quote-names +); + + +$verbose and warn "Checking that mysqldump can handle our flags\n"; +## Make sure this version can handle all the flags we want. +## Combine with user dump below +my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS; +## Argh. Any way to make this work on Win32? +my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1}; +if ($version =~ /unknown option/) { + die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n}; +} + +push @MYSQLDUMPARGS, "--user=$MYSQLUSER"; +length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD"; +length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST"; + +## Open the dump file to hold the mysqldump output +open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n}; +print qq{Writing file "$MYSQLDUMPFILE"\n}; + +open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB; +my $oldselect = select $mdump; + +print while <$mfork2>; + +## Slurp in the current schema +my $current_schema; +seek $mdump, 0, 0; +{ + local $/; + $current_schema = <$mdump>; +} +seek $mdump, 0, 0; +truncate $mdump, 0; + +warn qq{Trying to determine database version...\n} if $verbose; + +my $current_version = 0; +if ($current_schema =~ /CREATE TABLE \S+cur /) { + $current_version = 103; +} +elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) { + $current_version = 104; +} +elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) { + $current_version = 105; +} +elsif ($current_schema !~ /CREATE TABLE \S+validate /) { + $current_version = 106; +} +elsif ($current_schema !~ /ipb_auto tinyint/) { + $current_version = 107; +} +elsif ($current_schema !~ /CREATE TABLE \S+profiling /) { + $current_version = 108; +} +elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) { + $current_version = 109; +} +else { + $current_version = $MW_DEFAULT_VERSION; +} + +if (!$current_version) { + warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n}; + $current_version = $MW_DEFAULT_VERSION; +} + +## Check for a table prefix: +my $table_prefix = ''; +if ($current_schema =~ /CREATE TABLE (\S+)querycache /) { + $table_prefix = $1; +} + +warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose; +warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix; + +$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; +my $now = scalar localtime; +my $conninfo = ''; +$MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST"; +$MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET"; + +print qq{ +-- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema +-- Performed by the program: $0 +-- Version: $VERSION (subversion }.q{$LastChangedRevision: 21254 $}.qq{) +-- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome +-- +-- This file was created: $now +-- Executable used: $MYSQLDUMP +-- Connection information: +-- database: $MYSQLDB +-- user: $MYSQLUSER$conninfo + +-- This file can be imported manually with psql like so: +-- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename +-- This will overwrite any existing MediaWiki information, so be careful + +}; + +## psql specific stuff +print q{ +\\set ON_ERROR_STOP +BEGIN; +SET client_min_messages = 'WARNING'; +SET timezone = 'GMT'; +}; + +warn qq{Reading in the Postgres schema information\n} if $verbose; +open my $schema, '<', $PG_SCHEMA + or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n}; +my $t; +while (<$schema>) { + if (/CREATE TABLE\s+(\S+)/) { + $t = $1; + $table{$t}={}; + $verbose > 1 and warn qq{ Found table $t\n}; + } + elsif (/^ +(\w+)\s+TIMESTAMP/) { + $tz{$t}{$1}++; + $verbose > 1 and warn qq{ Got a timestamp for column $1\n}; + } + elsif (/REFERENCES\s*([^( ]+)/) { + my $ref = $1; + exists $table{$ref} or die qq{No parent table $ref found for $t\n}; + $table{$t}{$ref}++; + } +} +close $schema or die qq{Could not close "$PG_SCHEMA": $!\n}; + +## Read in special cases and table/version information +$verbose and warn qq{Reading in schema exception information\n}; +my %version_tables; +while (<DATA>) { + if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) { + my $list = join '|' => split /\s+/ => $2; + $version_tables{$1} = qr{\b$list\b}; + next; + } + next unless /^(\w+)\s*(.*)/; + $special{$1} = $2||''; + $special{$2} = $1 if length $2; +} + +## Determine the order of tables based on foreign key constraints +$verbose and warn qq{Figuring out order of tables to dump\n}; +my %dumped; +my $bail = 0; +{ + my $found=0; + T: for my $t (sort keys %table) { + next if exists $dumped{$t} and $dumped{$t} >= 1; + $found=1; + for my $dep (sort keys %{$table{$t}}) { + next T if ! exists $dumped{$dep} or $dumped{$dep} < 0; + } + $dumped{$t} = -1 if ! exists $dumped{$t}; + ## Skip certain tables that are not imported + next if exists $special{$t} and !$special{$t}; + push @torder, $special{$t} || $t; + } + last if !$found; + push @torder, '---'; + for (values %dumped) { $_+=2; } + die "Too many loops!\n" if $bail++ > 1000; + redo; +} + +## Prepare the Postgres database for the move +$verbose and warn qq{Writing Postgres transformation information\n}; + +print "\n-- Empty out all existing tables\n"; +$verbose and warn qq{Writing truncates to empty existing tables\n}; + + +for my $t (@torder, 'objectcache', 'querycache') { + next if $t eq '---'; + my $tname = $special{$t}||$t; + printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"}; +} +print "\n\n"; + +print qq{-- Temporarily rename pagecontent to "text"\n}; +print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n}; + +print qq{-- Allow rc_ip to contain empty string, will convert at end\n}; +print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n}; + +print "-- Changing all timestamp fields to handle raw integers\n"; +for my $t (sort keys %tz) { + next if $t eq 'archive2'; + for my $c (sort keys %{$tz{$t}}) { + printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c; + } +} +print "\n"; + +print q{ +INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10); +}; + +## If we have a table _prefix, we need to temporarily rename all of our Postgres +## tables temporarily for the import. Perhaps consider making this an auto-schema +## thing in the future. +if (length $table_prefix) { + print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n}; + for my $t (@torder) { + next if $t eq '---'; + my $tname = $special{$t}||$t; + printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"}; + } +} + + +## Try and dump the ill-named "user" table: +## We do this table alone because "user" is a reserved word. +print q{ + +SET escape_string_warning TO 'off'; +\\o /dev/null + +-- Postgres uses a table name of "mwuser" instead of "user" + +-- Create a dummy user to satisfy fk contraints especially with revisions +SELECT setval('user_user_id_seq',0,'false'); +INSERT INTO mwuser + VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); + +}; + +push @MYSQLDUMPARGS, '--no-create-info'; + +$verbose and warn qq{Dumping "user" table\n}; +$verbose > 2 and warn Dumper \@MYSQLDUMPARGS; +my $usertable = "${table_prefix}user"; +open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable; +## Unfortunately, there is no easy way to catch errors +my $numusers = 0; +while (<$mfork>) { + ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/; +} +close $mfork; +if ($numusers < 1) { + warn qq{No users found, probably a connection error.\n}; + print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n}; + close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n}; + exit; +} +print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n"; + +warn qq{Dumping all other tables from the MySQL schema\n} if $verbose; + +## Dump the rest of the tables, in chunks based on constraints +## We do not need the user table: +my @dumplist = grep { $_ ne 'user'} @torder; +my @alist; +{ + undef @alist; + PICKATABLE: { + my $tname = shift @dumplist; + ## XXX Make this dynamic below + for my $ver (sort {$b <=> $a } keys %version_tables) { + redo PICKATABLE if $tname =~ $version_tables{$ver}; + } + $tname = "${table_prefix}$tname" if length $table_prefix; + next if $tname !~ /^\w/; + push @alist, $tname; + $verbose and warn " $tname...\n"; + pop @alist and last if index($alist[-1],'---') >= 0; + redo if @dumplist; + } + + ## Dump everything else + open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist; + print while <$mfork2>; + close $mfork2; + warn qq{Finished dumping from MySQL\n} if $verbose; + + redo if @dumplist; +} + +warn qq{Writing information to return Postgres database to normal\n} if $verbose; +print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n}; +print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n}; +print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n}; + +## Return tables to their original names if a table prefix was used. +if (length $table_prefix) { + print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n}; + my $maxsize = 18; + for (@torder) { + $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize; + } + for my $t (@torder) { + next if $t eq '---' or $t eq 'text'; + my $tname = $special{$t}||$t; + printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"}; + } +} + +print qq{\n\n--Returning timestamps to normal\n}; +for my $t (sort keys %tz) { + next if $t eq 'archive2'; + for my $c (sort keys %{$tz{$t}}) { + printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n". + " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c; + } +} + +## Reset sequences +print q{ +SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive; +SELECT setval('ipblocks_ipb_id_val', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks; +SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job; +SELECT setval('log_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging; +SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page; +SELECT setval('pr_id_val', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions; +SELECT setval('rc_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges; +SELECT setval('rev_rev_id_val', 1+coalesce(max(rev_id) ,0),false) FROM revision; +SELECT setval('text_old_id_val', 1+coalesce(max(old_id) ,0),false) FROM pagecontent; +SELECT setval('trackbacks_tb_id_seq', 1+coalesce(max(tb_id) ,0),false) FROM trackbacks; +SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser; +}; + +## Finally, make a record in the mediawiki_version table about this import +print qq{ +INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??', +'Imported from file created on $now. Old version: $current_version'); +}; + +print "COMMIT;\n\\o\n\n-- End of dump\n\n"; +select $oldselect; +close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n}; +exit; + + +__DATA__ +## Known remappings: either indicate the MySQL name, +## or leave blank if it should be skipped +pagecontent text +mwuser user +mediawiki_version +archive2 +profiling +objectcache + +## Which tables to ignore depending on the version +VERSION 1.5: trackback +VERSION 1.6: externallinks job templatelinks transcache +VERSION 1.7: filearchive langlinks querycache_info +VERSION 1.9: querycachetwo page_restrictions redirect + diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql index e6cbbe2a..e5dd129b 100644 --- a/maintenance/postgres/tables.sql +++ b/maintenance/postgres/tables.sql @@ -5,7 +5,7 @@ -- For information about each table, please see the notes in maintenance/tables.sql -- Please make sure all dollar-quoting uses $mw$ at the start of the line -- We can't use SERIAL everywhere: the sequence names are hard-coded into the PHP --- TODO: Change CHAR to BOOL +-- TODO: Change CHAR to BOOL (still needed as CHAR due to some PHP code) BEGIN; SET client_min_messages = 'ERROR'; @@ -42,7 +42,7 @@ CREATE UNIQUE INDEX user_groups_unique ON user_groups (ug_user, ug_group); CREATE TABLE user_newtalk ( user_id INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE, - user_ip CIDR NULL + user_ip TEXT NULL ); CREATE INDEX user_newtalk_id_idx ON user_newtalk (user_id); CREATE INDEX user_newtalk_ip_idx ON user_newtalk (user_ip); @@ -92,9 +92,12 @@ CREATE TABLE revision ( rev_user_text TEXT NOT NULL, rev_timestamp TIMESTAMPTZ NOT NULL, rev_minor_edit CHAR NOT NULL DEFAULT '0', - rev_deleted CHAR NOT NULL DEFAULT '0' + rev_deleted CHAR NOT NULL DEFAULT '0', + rev_len INTEGER NULL, + rev_parent_id INTEGER NULL ); CREATE UNIQUE INDEX revision_unique ON revision (rev_page, rev_id); +CREATE INDEX rev_text_id_idx ON revision (rev_text_id); CREATE INDEX rev_timestamp_idx ON revision (rev_timestamp); CREATE INDEX rev_user_idx ON revision (rev_user); CREATE INDEX rev_user_text_idx ON revision (rev_user_text); @@ -108,7 +111,20 @@ CREATE TABLE pagecontent ( -- replaces reserved word 'text' ); -CREATE TABLE archive2 ( +CREATE SEQUENCE pr_id_val; +CREATE TABLE page_restrictions ( + pr_id INTEGER NOT NULL UNIQUE DEFAULT nextval('pr_id_val'), + pr_page INTEGER NULL REFERENCES page (page_id) ON DELETE CASCADE, + pr_type TEXT NOT NULL, + pr_level TEXT NOT NULL, + pr_cascade SMALLINT NOT NULL, + pr_user INTEGER NULL, + pr_expiry TIMESTAMPTZ NULL +); +ALTER TABLE page_restrictions ADD CONSTRAINT page_restrictions_pk PRIMARY KEY (pr_page,pr_type); + + +CREATE TABLE archive ( ar_namespace SMALLINT NOT NULL, ar_title TEXT NOT NULL, ar_text TEXT, @@ -119,24 +135,11 @@ CREATE TABLE archive2 ( ar_minor_edit CHAR NOT NULL DEFAULT '0', ar_flags TEXT, ar_rev_id INTEGER, - ar_text_id INTEGER -); -CREATE INDEX archive_name_title_timestamp ON archive2 (ar_namespace,ar_title,ar_timestamp); - --- This is the easiest way to work around the char(15) timestamp hack without modifying PHP code -CREATE VIEW archive AS -SELECT - ar_namespace, ar_title, ar_text, ar_comment, ar_user, ar_user_text, - ar_minor_edit, ar_flags, ar_rev_id, ar_text_id, - TO_CHAR(ar_timestamp, 'YYYYMMDDHH24MISS') AS ar_timestamp -FROM archive2; - -CREATE RULE archive_insert AS ON INSERT TO archive -DO INSTEAD INSERT INTO archive2 VALUES ( - NEW.ar_namespace, NEW.ar_title, NEW.ar_text, NEW.ar_comment, NEW.ar_user, NEW.ar_user_text, - TO_DATE(NEW.ar_timestamp, 'YYYYMMDDHH24MISS'), - NEW.ar_minor_edit, NEW.ar_flags, NEW.ar_rev_id, NEW.ar_text_id + ar_text_id INTEGER, + ar_deleted INTEGER NOT NULL DEFAULT 0, + ar_len INTEGER NULL ); +CREATE INDEX archive_name_title_timestamp ON archive (ar_namespace,ar_title,ar_timestamp); CREATE TABLE redirect ( @@ -223,7 +226,8 @@ CREATE TABLE ipblocks ( ipb_enable_autoblock CHAR NOT NULL DEFAULT '1', ipb_expiry TIMESTAMPTZ NOT NULL, ipb_range_start TEXT, - ipb_range_end TEXT + ipb_range_end TEXT, + ipb_deleted INTEGER NOT NULL DEFAULT 0 ); CREATE INDEX ipb_address ON ipblocks (ipb_address); CREATE INDEX ipb_user ON ipblocks (ipb_user); @@ -283,7 +287,8 @@ CREATE TABLE filearchive ( fa_description TEXT NOT NULL, fa_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL, fa_user_text TEXT NOT NULL, - fa_timestamp TIMESTAMPTZ + fa_timestamp TIMESTAMPTZ, + fa_deleted INTEGER NOT NULL DEFAULT 0 ); CREATE INDEX fa_name_time ON filearchive (fa_name, fa_timestamp); CREATE INDEX fa_dupe ON filearchive (fa_storage_group, fa_storage_key); @@ -313,7 +318,12 @@ CREATE TABLE recentchanges ( rc_patrolled CHAR NOT NULL DEFAULT '0', rc_ip CIDR, rc_old_len INTEGER, - rc_new_len INTEGER + rc_new_len INTEGER, + rc_deleted INTEGER NOT NULL DEFAULT 0, + rc_logid INTEGER NOT NULL DEFAULT 0, + rc_log_type TEXT, + rc_log_action TEXT, + rc_params TEXT ); CREATE INDEX rc_timestamp ON recentchanges (rc_timestamp); CREATE INDEX rc_namespace_title ON recentchanges (rc_namespace, rc_title); @@ -332,8 +342,8 @@ CREATE UNIQUE INDEX wl_user_namespace_title ON watchlist (wl_namespace, wl_title CREATE TABLE math ( - math_inputhash TEXT NOT NULL UNIQUE, - math_outputhash TEXT NOT NULL, + math_inputhash BYTEA NOT NULL UNIQUE, + math_outputhash BYTEA NOT NULL, math_html_conservativeness SMALLINT NOT NULL, math_html TEXT, math_mathml TEXT @@ -373,7 +383,6 @@ CREATE INDEX querycachetwo_type_value ON querycachetwo (qcc_type, qcc_value); CREATE INDEX querycachetwo_title ON querycachetwo (qcc_type,qcc_namespace,qcc_title); CREATE INDEX querycachetwo_titletwo ON querycachetwo (qcc_type,qcc_namespacetwo,qcc_titletwo); - CREATE TABLE objectcache ( keyname CHAR(255) UNIQUE, value BYTEA NOT NULL DEFAULT '', @@ -388,7 +397,9 @@ CREATE TABLE transcache ( ); +CREATE SEQUENCE log_log_id_seq; CREATE TABLE logging ( + log_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('log_log_id_seq'), log_type TEXT NOT NULL, log_action TEXT NOT NULL, log_timestamp TIMESTAMPTZ NOT NULL, @@ -396,7 +407,8 @@ CREATE TABLE logging ( log_namespace SMALLINT NOT NULL, log_title TEXT NOT NULL, log_comment TEXT, - log_params TEXT + log_params TEXT, + log_deleted INTEGER NOT NULL DEFAULT 0 ); CREATE INDEX logging_type_name ON logging (log_type, log_timestamp); CREATE INDEX logging_user_time ON logging (log_timestamp, log_user); @@ -427,7 +439,6 @@ CREATE INDEX job_cmd_namespace_title ON job (job_cmd, job_namespace, job_title); -- Tsearch2 2 stuff. Will fail if we don't have proper access to the tsearch2 tables ALTER TABLE page ADD titlevector tsvector; -CREATE INDEX ts2_page_title ON page USING gist(titlevector); CREATE FUNCTION ts2_page_title() RETURNS TRIGGER LANGUAGE plpgsql AS $mw$ BEGIN @@ -445,7 +456,6 @@ CREATE TRIGGER ts2_page_title BEFORE INSERT OR UPDATE ON page ALTER TABLE pagecontent ADD textvector tsvector; -CREATE INDEX ts2_page_text ON pagecontent USING gist(textvector); CREATE FUNCTION ts2_page_text() RETURNS TRIGGER LANGUAGE plpgsql AS $mw$ BEGIN @@ -461,6 +471,11 @@ $mw$; CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON pagecontent FOR EACH ROW EXECUTE PROCEDURE ts2_page_text(); +-- These are added by the setup script due to version compatibility issues +-- If using 8.1, switch from "gin" to "gist" +-- CREATE INDEX ts2_page_title ON page USING gin(titlevector); +-- CREATE INDEX ts2_page_text ON pagecontent USING gin(textvector); + CREATE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS $mw$ INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES ($1,$2,$3); @@ -496,7 +511,6 @@ CREATE TABLE mediawiki_version ( ); INSERT INTO mediawiki_version (type,mw_version,sql_version,sql_date) - VALUES ('Creation','??','$LastChangedRevision: 18326 $','$LastChangedDate: 2006-12-14 07:34:56 -0800 (Thu, 14 Dec 2006) $'); + VALUES ('Creation','??','$LastChangedRevision: 20687 $','$LastChangedDate: 2007-03-25 20:12:26 -0400 (Sun, 25 Mar 2007) $'); -COMMIT; |