diff options
Diffstat (limited to 'maintenance/postgres/compare_schemas.pl')
-rw-r--r-- | maintenance/postgres/compare_schemas.pl | 221 |
1 files changed, 220 insertions, 1 deletions
diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl index 297e3af2..6f639eca 100644 --- a/maintenance/postgres/compare_schemas.pl +++ b/maintenance/postgres/compare_schemas.pl @@ -112,6 +112,8 @@ sub parse_sql { } elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) { $info{$table}{column}{$1} = $2; + my $extra = $3 || ''; + $info{$table}{columnfull}{$1} = "$2$extra"; } elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) { $info{$table}{lc $1.'_name'} = $2 ? $2 : ''; @@ -128,6 +130,46 @@ sub parse_sql { } ## end of parse_sql +## Read in the parser test information +my $parsefile = '../parserTests.inc'; +open my $pfh, '<', $parsefile or die qq{Could not open "$parsefile": $!\n}; +my $stat = 0; +my %ptable; +while (<$pfh>) { + if (!$stat) { + if (/function listTables/) { + $stat = 1; + } + next; + } + $ptable{$1}=2 while /'(\w+)'/g; + last if /\);/; +} +close $pfh; + +my $OK_NOT_IN_PTABLE = ' +filearchive +logging +profiling +querycache_info +searchindex +trackbacks +transcache +user_newtalk +'; + +## Make sure all tables in main tables.sql are accounted for int the parsertest. +for my $table (sort keys %{$old{'../tables.sql'}}) { + $ptable{$table}++; + next if $ptable{$table} > 2; + next if $OK_NOT_IN_PTABLE =~ /\b$table\b/; + print qq{Table "$table" is in the schema, but not used inside of parserTest.inc\n}; +} +## Any that are used in ptables but no longer exist in the schema? +for my $table (sort grep { $ptable{$_} == 2 } keys %ptable) { + print qq{Table "$table" ($ptable{$table}) used in parserTest.inc, but not found in schema\n}; +} + for my $oldfile (@old) { ## Begin non-standard indent @@ -209,6 +251,171 @@ while (<$newfh>) { } } +## Which column types are okay to map from mysql to postgres? +my $COLMAP = q{ +## INTS: +tinyint SMALLINT +int INTEGER SERIAL +bigint BIGINT +real NUMERIC +float NUMERIC + +## TEXT: +varchar(32) TEXT +varchar(70) TEXT +varchar(255) TEXT +varchar TEXT +text TEXT +tinytext TEXT +ENUM TEXT + +## TIMESTAMPS: +varbinary(14) TIMESTAMPTZ +binary(14) TIMESTAMPTZ +datetime TIMESTAMPTZ +timestamp TIMESTAMPTZ + +## BYTEA: +mediumblob BYTEA + +## OTHER: +bool CHAR # Sigh + +}; +## Allow specific exceptions to the above +my $COLMAPOK = q{ +## User inputted text strings: +ar_comment tinyblob TEXT +fa_description tinyblob TEXT +img_description tinyblob TEXT +ipb_reason tinyblob TEXT +log_action varbinary(10) TEXT +oi_description tinyblob TEXT +rev_comment tinyblob TEXT +rc_log_action varbinary(255) TEXT +rc_log_type varbinary(255) TEXT + +## Simple text-only strings: +ar_flags tinyblob TEXT +fa_minor_mime varbinary(32) TEXT +fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text +fa_storage_key varbinary(64) TEXT # sha1 plus text extension +ipb_address tinyblob TEXT # IP address or username +ipb_range_end tinyblob TEXT # hexadecimal +ipb_range_start tinyblob TEXT # hexadecimal +img_minor_mime varbinary(32) TEXT +img_sha1 varbinary(32) TEXT +job_cmd varbinary(60) TEXT # Should we limit to 60 as well? +keyname varbinary(255) TEXT # No tablename prefix (objectcache) +ll_lang varbinary(20) TEXT # Language code +log_params blob TEXT # LF separated list of args +log_type varbinary(10) TEXT +oi_minor_mime varbinary(32) TEXT +oi_sha1 varbinary(32) TEXT +old_flags tinyblob TEXT +old_text mediumblob TEXT +page_restrictions tinyblob TEXT # CSV string +pf_server varchar(30) TEXT +pr_level varbinary(60) TEXT +pr_type varbinary(60) TEXT +pt_create_perm varbinary(60) TEXT +pt_reason tinyblob TEXT +qc_type varbinary(32) TEXT +qcc_type varbinary(32) TEXT +qci_type varbinary(32) TEXT +rc_params blob TEXT +ug_group varbinary(16) TEXT +user_email_token binary(32) TEXT +user_ip varbinary(40) TEXT +user_newpassword tinyblob TEXT +user_options blob TEXT +user_password tinyblob TEXT +user_token binary(32) TEXT + +## Text URLs: +el_index blob TEXT +el_to blob TEXT +iw_url blob TEXT +tb_url blob TEXT +tc_url varbinary(255) TEXT + +## Deprecated or not yet used: +ar_text mediumblob TEXT +job_params blob TEXT +log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety +rc_type tinyint CHAR + +## Number tweaking: +fa_bits int SMALLINT # bits per pixel +fa_height int SMALLINT +fa_width int SMALLINT # Hope we don't see an image this wide... +hc_id int BIGINT # Odd that site_stats is all bigint... +img_bits int SMALLINT # bits per image should stay sane +oi_bits int SMALLINT + +## True binary fields, usually due to gzdeflate and/or serialize: +math_inputhash varbinary(16) BYTEA +math_outputhash varbinary(16) BYTEA + +## Namespaces: not need for such a high range +ar_namespace int SMALLINT +job_namespace int SMALLINT +log_namespace int SMALLINT +page_namespace int SMALLINT +pl_namespace int SMALLINT +pt_namespace int SMALLINT +qc_namespace int SMALLINT +rc_namespace int SMALLINT +rd_namespace int SMALLINT +tl_namespace int SMALLINT +wl_namespace int SMALLINT + +## "Bools" +ar_minor_edit tinyint CHAR +iw_trans tinyint CHAR +page_is_new tinyint CHAR +page_is_redirect tinyint CHAR +rc_bot tinyint CHAR +rc_deleted tinyint CHAR +rc_minor tinyint CHAR +rc_new tinyint CHAR +rc_patrolled tinyint CHAR +rev_deleted tinyint CHAR +rev_minor_edit tinyint CHAR + +## Easy enough to change if a wiki ever does grow this big: +ss_good_articles bigint INTEGER +ss_total_edits bigint INTEGER +ss_total_pages bigint INTEGER +ss_total_views bigint INTEGER +ss_users bigint INTEGER + +## True IP - keep an eye on these, coders tend to make textual assumptions +rc_ip varbinary(40) CIDR # Want to keep an eye on this + +## Others: +tc_time int TIMESTAMPTZ + + +}; + +my %colmap; +for (split /\n/ => $COLMAP) { + next unless /^\w/; + s/(.*?)#.*/$1/; + my ($col,@maps) = split / +/, $_; + for (@maps) { + $colmap{$col}{$_} = 1; + } +} + +my %colmapok; +for (split /\n/ => $COLMAPOK) { + next unless /^\w/; + my ($col,$old,$new) = split / +/, $_; + $colmapok{$col}{$old}{$new} = 1; +} + ## Old but not new for my $t (sort keys %{$old{$oldfile}}) { if (!exists $new{$t} and !exists $ok{OLD}{$t}) { @@ -218,6 +425,7 @@ for my $t (sort keys %{$old{$oldfile}}) { next if exists $ok{OLD}{$t} and !$ok{OLD}{$t}; my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t; my $oldcol = $old{$oldfile}{$t}{column}; + my $oldcolfull = $old{$oldfile}{$t}{columnfull}; my $newcol = $new{$newt}{column}; for my $c (keys %$oldcol) { if (!exists $newcol->{$c}) { @@ -225,11 +433,22 @@ for my $t (sort keys %{$old{$oldfile}}) { next; } } - for my $c (keys %$newcol) { + for my $c (sort keys %$newcol) { if (!exists $oldcol->{$c}) { print "Column $t.$c not in $oldfile\n"; next; } + ## Column types (roughly) match up? + my $new = $newcol->{$c}; + my $old = $oldcolfull->{$c}; + + ## Known exceptions: + next if exists $colmapok{$c}{$old}{$new}; + + $old =~ s/ENUM.*/ENUM/; + if (! exists $colmap{$old}{$new}) { + print "Column types for $t.$c do not match: $old does not map to $new\n"; + } } } ## New but not old: |