diff options
Diffstat (limited to 'scripts/fixup_utf8.php')
-rw-r--r-- | scripts/fixup_utf8.php | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/scripts/fixup_utf8.php b/scripts/fixup_utf8.php new file mode 100644 index 000000000..169376091 --- /dev/null +++ b/scripts/fixup_utf8.php @@ -0,0 +1,368 @@ +#!/usr/bin/env php +<?php +/* + * Laconica - a distributed open-source microblogging tool + * Copyright (C) 2009, Control Yourself, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +# Abort if called from a web server +if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) { + print "This script must be run from the command line\n"; + exit(1); +} + +ini_set("max_execution_time", "0"); +ini_set("max_input_time", "0"); +set_time_limit(0); +mb_internal_encoding('UTF-8'); + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); +define('LACONICA', true); + +require_once(INSTALLDIR . '/lib/common.php'); +require_once('DB.php'); + +class UTF8FixerUpper +{ + var $dbl = null; + var $dbu = null; + var $args = array(); + + function __construct($args) + { + $this->args = $args; + + if (array_key_exists('max_date', $args)) { + $this->max_date = strftime('%Y-%m-%d %H:%M:%S', strtotime($args['max_date'])); + } else { + $this->max_date = strftime('%Y-%m-%d %H:%M:%S', time()); + } + + $this->dbl = $this->doConnect('latin1'); + + if (empty($this->dbl)) { + return; + } + + $this->dbu = $this->doConnect('utf8'); + + if (empty($this->dbu)) { + return; + } + } + + function doConnect($charset) + { + $db = DB::connect(common_config('db', 'database'), + array('persistent' => false)); + + if (PEAR::isError($db)) { + echo "ERROR: " . $db->getMessage() . "\n"; + return NULL; + } + + $conn = $db->connection; + + $succ = mysqli_set_charset($conn, $charset); + + if (!$succ) { + echo "ERROR: couldn't set charset\n"; + $db->disconnect(); + return NULL; + } + + $result = $db->autoCommit(true); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + $db->disconnect(); + return NULL; + } + + return $db; + } + + function fixup() + { + $this->fixupNotices($this->args['max_notice'], + $this->args['min_notice']); + $this->fixupProfiles(); + $this->fixupGroups(); + $this->fixupMessages(); + } + + function fixupNotices($max_id, $min_id) { + + // Do a separate DB connection + + $sth = $this->dbu->prepare("UPDATE notice SET content = UNHEX(?), rendered = UNHEX(?) WHERE id = ?"); + + if (PEAR::isError($sth)) { + echo "ERROR: " . $sth->getMessage() . "\n"; + return; + } + + $sql = 'SELECT id, content, rendered FROM notice ' . + 'WHERE LENGTH(content) != CHAR_LENGTH(content) '. + 'AND modified < "'.$this->max_date.'" '; + + if (!empty($max_id)) { + $sql .= ' AND id <= ' . $max_id; + } + + if (!empty($min_id)) { + $sql .= ' AND id >= ' . $min_id; + } + + $sql .= ' ORDER BY id DESC'; + + $rn = $this->dbl->query($sql); + + if (PEAR::isError($rn)) { + echo "ERROR: " . $rn->getMessage() . "\n"; + return; + } + + echo "Number of rows: " . $rn->numRows() . "\n"; + + $notice = array(); + + while (DB_OK == $rn->fetchInto($notice)) { + + $id = ($notice[0])+0; + $content = bin2hex($notice[1]); + $rendered = bin2hex($notice[2]); + + echo "$id..."; + + $result =& $this->dbu->execute($sth, array($content, $rendered, $id)); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + continue; + } + + $cnt = $this->dbu->affectedRows(); + + if ($cnt != 1) { + echo "ERROR: 0 rows affected\n"; + continue; + } + + $notice = Notice::staticGet('id', $id); + $notice->decache(); + $notice->free(); + + echo "OK\n"; + } + } + + function fixupProfiles() + { + // Do a separate DB connection + + $sth = $this->dbu->prepare("UPDATE profile SET ". + "fullname = UNHEX(?),". + "location = UNHEX(?), ". + "bio = UNHEX(?) ". + "WHERE id = ?"); + + if (PEAR::isError($sth)) { + echo "ERROR: " . $sth->getMessage() . "\n"; + return; + } + + $sql = 'SELECT id, fullname, location, bio FROM profile ' . + 'WHERE (LENGTH(fullname) != CHAR_LENGTH(fullname) '. + 'OR LENGTH(location) != CHAR_LENGTH(location) '. + 'OR LENGTH(bio) != CHAR_LENGTH(bio)) '. + 'AND modified < "'.$this->max_date.'" '. + ' ORDER BY modified DESC'; + + $rn = $this->dbl->query($sql); + + if (PEAR::isError($rn)) { + echo "ERROR: " . $rn->getMessage() . "\n"; + return; + } + + echo "Number of rows: " . $rn->numRows() . "\n"; + + $profile = array(); + + while (DB_OK == $rn->fetchInto($profile)) { + + $id = ($profile[0])+0; + $fullname = bin2hex($profile[1]); + $location = bin2hex($profile[2]); + $bio = bin2hex($profile[3]); + + echo "$id..."; + + $result =& $this->dbu->execute($sth, array($fullname, $location, $bio, $id)); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + continue; + } + + $cnt = $this->dbu->affectedRows(); + + if ($cnt != 1) { + echo "ERROR: 0 rows affected\n"; + continue; + } + + $profile = Profile::staticGet('id', $id); + $profile->decache(); + $profile->free(); + + echo "OK\n"; + } + } + + function fixupGroups() + { + // Do a separate DB connection + + $sth = $this->dbu->prepare("UPDATE user_group SET ". + "fullname = UNHEX(?),". + "location = UNHEX(?), ". + "description = UNHEX(?) ". + "WHERE id = ?"); + + if (PEAR::isError($sth)) { + echo "ERROR: " . $sth->getMessage() . "\n"; + return; + } + + $sql = 'SELECT id, fullname, location, description FROM user_group ' . + 'WHERE LENGTH(fullname) != CHAR_LENGTH(fullname) '. + 'OR LENGTH(location) != CHAR_LENGTH(location) '. + 'OR LENGTH(description) != CHAR_LENGTH(description) '; + 'AND modified < "'.$this->max_date.'" '. + 'ORDER BY modified DESC'; + + $rn = $this->dbl->query($sql); + + if (PEAR::isError($rn)) { + echo "ERROR: " . $rn->getMessage() . "\n"; + return; + } + + echo "Number of rows: " . $rn->numRows() . "\n"; + + $user_group = array(); + + while (DB_OK == $rn->fetchInto($user_group)) { + + $id = ($user_group[0])+0; + $fullname = bin2hex($user_group[1]); + $location = bin2hex($user_group[2]); + $description = bin2hex($user_group[3]); + + echo "$id..."; + + $result =& $this->dbu->execute($sth, array($fullname, $location, $description, $id)); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + continue; + } + + $cnt = $this->dbu->affectedRows(); + + if ($cnt != 1) { + echo "ERROR: 0 rows affected\n"; + continue; + } + + $user_group = User_group::staticGet('id', $id); + $user_group->decache(); + $user_group->free(); + + echo "OK\n"; + } + } + + function fixupMessages() { + + // Do a separate DB connection + + $sth = $this->dbu->prepare("UPDATE message SET content = UNHEX(?), rendered = UNHEX(?) WHERE id = ?"); + + if (PEAR::isError($sth)) { + echo "ERROR: " . $sth->getMessage() . "\n"; + return; + } + + $sql = 'SELECT id, content, rendered FROM message ' . + 'WHERE LENGTH(content) != CHAR_LENGTH(content) '. + 'AND modified < "'.$this->max_date.'" '. + 'ORDER BY id DESC'; + + $rn = $this->dbl->query($sql); + + if (PEAR::isError($rn)) { + echo "ERROR: " . $rn->getMessage() . "\n"; + return; + } + + echo "Number of rows: " . $rn->numRows() . "\n"; + + $message = array(); + + while (DB_OK == $rn->fetchInto($message)) { + + $id = ($message[0])+0; + $content = bin2hex($message[1]); + $rendered = bin2hex($message[2]); + + echo "$id..."; + + $result =& $this->dbu->execute($sth, array($content, $rendered, $id)); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + continue; + } + + $cnt = $this->dbu->affectedRows(); + + if ($cnt != 1) { + echo "ERROR: 0 rows affected\n"; + continue; + } + + $message = Message::staticGet('id', $id); + $message->decache(); + $message->free(); + + echo "OK\n"; + } + } +} + +$max_date = ($argc > 1) ? $argv[1] : null; +$max_id = ($argc > 2) ? $argv[2] : null; +$min_id = ($argc > 3) ? $argv[3] : null; + +$fixer = new UTF8FixerUpper(array('max_date' => $max_date, + 'max_notice' => $max_id, + 'min_notice' => $min_id)); + +$fixer->fixup(); + |