summaryrefslogtreecommitdiff
path: root/scripts/fixup_utf8.php
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/fixup_utf8.php')
-rw-r--r--scripts/fixup_utf8.php368
1 files changed, 368 insertions, 0 deletions
diff --git a/scripts/fixup_utf8.php b/scripts/fixup_utf8.php
new file mode 100644
index 000000000..169376091
--- /dev/null
+++ b/scripts/fixup_utf8.php
@@ -0,0 +1,368 @@
+#!/usr/bin/env php
+<?php
+/*
+ * Laconica - a distributed open-source microblogging tool
+ * Copyright (C) 2009, Control Yourself, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+# Abort if called from a web server
+if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) {
+ print "This script must be run from the command line\n";
+ exit(1);
+}
+
+ini_set("max_execution_time", "0");
+ini_set("max_input_time", "0");
+set_time_limit(0);
+mb_internal_encoding('UTF-8');
+
+define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
+define('LACONICA', true);
+
+require_once(INSTALLDIR . '/lib/common.php');
+require_once('DB.php');
+
+class UTF8FixerUpper
+{
+ var $dbl = null;
+ var $dbu = null;
+ var $args = array();
+
+ function __construct($args)
+ {
+ $this->args = $args;
+
+ if (array_key_exists('max_date', $args)) {
+ $this->max_date = strftime('%Y-%m-%d %H:%M:%S', strtotime($args['max_date']));
+ } else {
+ $this->max_date = strftime('%Y-%m-%d %H:%M:%S', time());
+ }
+
+ $this->dbl = $this->doConnect('latin1');
+
+ if (empty($this->dbl)) {
+ return;
+ }
+
+ $this->dbu = $this->doConnect('utf8');
+
+ if (empty($this->dbu)) {
+ return;
+ }
+ }
+
+ function doConnect($charset)
+ {
+ $db = DB::connect(common_config('db', 'database'),
+ array('persistent' => false));
+
+ if (PEAR::isError($db)) {
+ echo "ERROR: " . $db->getMessage() . "\n";
+ return NULL;
+ }
+
+ $conn = $db->connection;
+
+ $succ = mysqli_set_charset($conn, $charset);
+
+ if (!$succ) {
+ echo "ERROR: couldn't set charset\n";
+ $db->disconnect();
+ return NULL;
+ }
+
+ $result = $db->autoCommit(true);
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ $db->disconnect();
+ return NULL;
+ }
+
+ return $db;
+ }
+
+ function fixup()
+ {
+ $this->fixupNotices($this->args['max_notice'],
+ $this->args['min_notice']);
+ $this->fixupProfiles();
+ $this->fixupGroups();
+ $this->fixupMessages();
+ }
+
+ function fixupNotices($max_id, $min_id) {
+
+ // Do a separate DB connection
+
+ $sth = $this->dbu->prepare("UPDATE notice SET content = UNHEX(?), rendered = UNHEX(?) WHERE id = ?");
+
+ if (PEAR::isError($sth)) {
+ echo "ERROR: " . $sth->getMessage() . "\n";
+ return;
+ }
+
+ $sql = 'SELECT id, content, rendered FROM notice ' .
+ 'WHERE LENGTH(content) != CHAR_LENGTH(content) '.
+ 'AND modified < "'.$this->max_date.'" ';
+
+ if (!empty($max_id)) {
+ $sql .= ' AND id <= ' . $max_id;
+ }
+
+ if (!empty($min_id)) {
+ $sql .= ' AND id >= ' . $min_id;
+ }
+
+ $sql .= ' ORDER BY id DESC';
+
+ $rn = $this->dbl->query($sql);
+
+ if (PEAR::isError($rn)) {
+ echo "ERROR: " . $rn->getMessage() . "\n";
+ return;
+ }
+
+ echo "Number of rows: " . $rn->numRows() . "\n";
+
+ $notice = array();
+
+ while (DB_OK == $rn->fetchInto($notice)) {
+
+ $id = ($notice[0])+0;
+ $content = bin2hex($notice[1]);
+ $rendered = bin2hex($notice[2]);
+
+ echo "$id...";
+
+ $result =& $this->dbu->execute($sth, array($content, $rendered, $id));
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ continue;
+ }
+
+ $cnt = $this->dbu->affectedRows();
+
+ if ($cnt != 1) {
+ echo "ERROR: 0 rows affected\n";
+ continue;
+ }
+
+ $notice = Notice::staticGet('id', $id);
+ $notice->decache();
+ $notice->free();
+
+ echo "OK\n";
+ }
+ }
+
+ function fixupProfiles()
+ {
+ // Do a separate DB connection
+
+ $sth = $this->dbu->prepare("UPDATE profile SET ".
+ "fullname = UNHEX(?),".
+ "location = UNHEX(?), ".
+ "bio = UNHEX(?) ".
+ "WHERE id = ?");
+
+ if (PEAR::isError($sth)) {
+ echo "ERROR: " . $sth->getMessage() . "\n";
+ return;
+ }
+
+ $sql = 'SELECT id, fullname, location, bio FROM profile ' .
+ 'WHERE (LENGTH(fullname) != CHAR_LENGTH(fullname) '.
+ 'OR LENGTH(location) != CHAR_LENGTH(location) '.
+ 'OR LENGTH(bio) != CHAR_LENGTH(bio)) '.
+ 'AND modified < "'.$this->max_date.'" '.
+ ' ORDER BY modified DESC';
+
+ $rn = $this->dbl->query($sql);
+
+ if (PEAR::isError($rn)) {
+ echo "ERROR: " . $rn->getMessage() . "\n";
+ return;
+ }
+
+ echo "Number of rows: " . $rn->numRows() . "\n";
+
+ $profile = array();
+
+ while (DB_OK == $rn->fetchInto($profile)) {
+
+ $id = ($profile[0])+0;
+ $fullname = bin2hex($profile[1]);
+ $location = bin2hex($profile[2]);
+ $bio = bin2hex($profile[3]);
+
+ echo "$id...";
+
+ $result =& $this->dbu->execute($sth, array($fullname, $location, $bio, $id));
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ continue;
+ }
+
+ $cnt = $this->dbu->affectedRows();
+
+ if ($cnt != 1) {
+ echo "ERROR: 0 rows affected\n";
+ continue;
+ }
+
+ $profile = Profile::staticGet('id', $id);
+ $profile->decache();
+ $profile->free();
+
+ echo "OK\n";
+ }
+ }
+
+ function fixupGroups()
+ {
+ // Do a separate DB connection
+
+ $sth = $this->dbu->prepare("UPDATE user_group SET ".
+ "fullname = UNHEX(?),".
+ "location = UNHEX(?), ".
+ "description = UNHEX(?) ".
+ "WHERE id = ?");
+
+ if (PEAR::isError($sth)) {
+ echo "ERROR: " . $sth->getMessage() . "\n";
+ return;
+ }
+
+ $sql = 'SELECT id, fullname, location, description FROM user_group ' .
+ 'WHERE LENGTH(fullname) != CHAR_LENGTH(fullname) '.
+ 'OR LENGTH(location) != CHAR_LENGTH(location) '.
+ 'OR LENGTH(description) != CHAR_LENGTH(description) ';
+ 'AND modified < "'.$this->max_date.'" '.
+ 'ORDER BY modified DESC';
+
+ $rn = $this->dbl->query($sql);
+
+ if (PEAR::isError($rn)) {
+ echo "ERROR: " . $rn->getMessage() . "\n";
+ return;
+ }
+
+ echo "Number of rows: " . $rn->numRows() . "\n";
+
+ $user_group = array();
+
+ while (DB_OK == $rn->fetchInto($user_group)) {
+
+ $id = ($user_group[0])+0;
+ $fullname = bin2hex($user_group[1]);
+ $location = bin2hex($user_group[2]);
+ $description = bin2hex($user_group[3]);
+
+ echo "$id...";
+
+ $result =& $this->dbu->execute($sth, array($fullname, $location, $description, $id));
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ continue;
+ }
+
+ $cnt = $this->dbu->affectedRows();
+
+ if ($cnt != 1) {
+ echo "ERROR: 0 rows affected\n";
+ continue;
+ }
+
+ $user_group = User_group::staticGet('id', $id);
+ $user_group->decache();
+ $user_group->free();
+
+ echo "OK\n";
+ }
+ }
+
+ function fixupMessages() {
+
+ // Do a separate DB connection
+
+ $sth = $this->dbu->prepare("UPDATE message SET content = UNHEX(?), rendered = UNHEX(?) WHERE id = ?");
+
+ if (PEAR::isError($sth)) {
+ echo "ERROR: " . $sth->getMessage() . "\n";
+ return;
+ }
+
+ $sql = 'SELECT id, content, rendered FROM message ' .
+ 'WHERE LENGTH(content) != CHAR_LENGTH(content) '.
+ 'AND modified < "'.$this->max_date.'" '.
+ 'ORDER BY id DESC';
+
+ $rn = $this->dbl->query($sql);
+
+ if (PEAR::isError($rn)) {
+ echo "ERROR: " . $rn->getMessage() . "\n";
+ return;
+ }
+
+ echo "Number of rows: " . $rn->numRows() . "\n";
+
+ $message = array();
+
+ while (DB_OK == $rn->fetchInto($message)) {
+
+ $id = ($message[0])+0;
+ $content = bin2hex($message[1]);
+ $rendered = bin2hex($message[2]);
+
+ echo "$id...";
+
+ $result =& $this->dbu->execute($sth, array($content, $rendered, $id));
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ continue;
+ }
+
+ $cnt = $this->dbu->affectedRows();
+
+ if ($cnt != 1) {
+ echo "ERROR: 0 rows affected\n";
+ continue;
+ }
+
+ $message = Message::staticGet('id', $id);
+ $message->decache();
+ $message->free();
+
+ echo "OK\n";
+ }
+ }
+}
+
+$max_date = ($argc > 1) ? $argv[1] : null;
+$max_id = ($argc > 2) ? $argv[2] : null;
+$min_id = ($argc > 3) ? $argv[3] : null;
+
+$fixer = new UTF8FixerUpper(array('max_date' => $max_date,
+ 'max_notice' => $max_id,
+ 'min_notice' => $min_id));
+
+$fixer->fixup();
+