summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README34
-rw-r--r--classes/Memcached_DataObject.php18
-rw-r--r--db/notice_source.sql2
-rw-r--r--lib/common.php1
-rw-r--r--scripts/fixup_utf8.php141
5 files changed, 196 insertions, 0 deletions
diff --git a/README b/README
index 9207f3e90..db912f201 100644
--- a/README
+++ b/README
@@ -694,6 +694,13 @@ to users on a remote site. (Or not... it's not well tested.) The
Upgrading
=========
+IMPORTANT NOTE: Laconica 0.7.4 introduced a fix for some
+incorrectly-stored international characters ("UTF-8"). For new
+installations, it will now store non-ASCII characters correctly.
+However, older installations will have the incorrect storage, and will
+consequently show up "wrong" in browsers. See below for how to deal
+with this situation.
+
If you've been using Laconica 0.6, 0.5 or lower, or if you've been
tracking the "git" version of the software, you will probably want
to upgrade and keep your existing data. There is no automated upgrade
@@ -783,6 +790,29 @@ problem.
3. When fixup_inboxes is finished, you can set the enabled flag to
'true'.
+UTF-8 Database
+--------------
+
+Laconica 0.7.4 introduced a fix for some incorrectly-stored
+international characters ("UTF-8"). This fix is not
+backwards-compatible; installations from before 0.7.4 will show
+non-ASCII characters of old notices incorrectly. This section explains
+what to do.
+
+0. You can disable the new behaviour by setting the 'db''utf8' config
+ option to "false". You should only do this until you're ready to
+ convert your DB to the new format.
+1. When you're ready to convert, you can run the fixup_utf8.php script
+ in the scripts/ subdirectory. If you've had the "new behaviour"
+ enabled (probably a good idea), you can give the ID of the first
+ "new" notice as a parameter, and only notices before that one will
+ be converted. Notices are converted in reverse chronological order,
+ so the most recent (and visible) ones will be converted first. The
+ script should work whether or not you have the 'db''utf8' config
+ option enabled.
+2. When you're ready, set $config['db']['utf8'] to true, so that
+ new notices will be stored correctly.
+
Configuration options
=====================
@@ -910,6 +940,10 @@ mirror: you can set this to an array of DSNs, like the above
and adding the slaves to this array. Note that if you want some
requests to go to the 'database' (master) server, you'll need
to include it in this array, too.
+utf8: whether to talk to the database in UTF-8 mode. This is the default
+ with new installations, but older sites may want to turn it off
+ until they get their databases fixed up. See "UTF-8 database"
+ above for details.
syslog
------
diff --git a/classes/Memcached_DataObject.php b/classes/Memcached_DataObject.php
index 5f71f716b..52ad4100f 100644
--- a/classes/Memcached_DataObject.php
+++ b/classes/Memcached_DataObject.php
@@ -227,4 +227,22 @@ class Memcached_DataObject extends DB_DataObject
$c->set($ckey, $cached, MEMCACHE_COMPRESSED, $expiry);
return new ArrayWrapper($cached);
}
+
+ // We overload so that 'SET NAMES "utf8"' is called for
+ // each connection
+
+ function _connect()
+ {
+ global $_DB_DATAOBJECT;
+ $exists = !empty($this->_database_dsn_md5) &&
+ isset($_DB_DATAOBJECT['CONNECTIONS'][$this->_database_dsn_md5]);
+ $result = parent::_connect();
+ if (!$exists) {
+ $DB = &$_DB_DATAOBJECT['CONNECTIONS'][$this->_database_dsn_md5];
+ if (common_config('db', 'utf8')) {
+ $DB->query('SET NAMES "utf8"');
+ }
+ }
+ return $result;
+ }
}
diff --git a/db/notice_source.sql b/db/notice_source.sql
index 1508af1ec..d5a280b82 100644
--- a/db/notice_source.sql
+++ b/db/notice_source.sql
@@ -2,6 +2,7 @@ INSERT INTO notice_source
(code, name, url, created)
VALUES
('adium', 'Adium', 'http://www.adiumx.com/', now()),
+ ('AgentSolo.com','AgentSolo.com','http://www.agentsolo.com/', now()),
('betwittered','BeTwittered','http://www.32hours.com/betwitteredinfo/', now()),
('bti','bti','http://gregkh.github.com/bti/', now()),
('cliqset', 'Cliqset', 'http://www.cliqset.com/', now()),
@@ -29,6 +30,7 @@ VALUES
('pingvine','PingVine','http://pingvine.com/', now()),
('pocketwit','PockeTwit','http://code.google.com/p/pocketwit/', now()),
('posty','Posty','http://spreadingfunkyness.com/posty/', now()),
+ ('qtwitter','qTwitter','http://qtwitter.ayoy.net/', now()),
('royalewithcheese','Royale With Cheese','http://p.hellyeah.org/', now()),
('rssdent','rssdent','http://github.com/zcopley/rssdent/tree/master', now()),
('rygh.no','rygh.no','http://rygh.no/', now()),
diff --git a/lib/common.php b/lib/common.php
index 0ce46442d..4a98741e8 100644
--- a/lib/common.php
+++ b/lib/common.php
@@ -174,6 +174,7 @@ $config['db'] =
'require_prefix' => 'classes/',
'class_prefix' => '',
'mirror' => null,
+ 'utf8' => true,
'db_driver' => 'DB', # XXX: JanRain libs only work with DB
'quote_identifiers' => false,
'type' => 'mysql' );
diff --git a/scripts/fixup_utf8.php b/scripts/fixup_utf8.php
new file mode 100644
index 000000000..e5021ff34
--- /dev/null
+++ b/scripts/fixup_utf8.php
@@ -0,0 +1,141 @@
+#!/usr/bin/env php
+<?php
+/*
+ * Laconica - a distributed open-source microblogging tool
+ * Copyright (C) 2009, Control Yourself, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+# Abort if called from a web server
+if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) {
+ print "This script must be run from the command line\n";
+ exit(1);
+}
+
+ini_set("max_execution_time", "0");
+ini_set("max_input_time", "0");
+set_time_limit(0);
+mb_internal_encoding('UTF-8');
+
+define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
+define('LACONICA', true);
+
+require_once(INSTALLDIR . '/lib/common.php');
+require_once('DB.php');
+
+function fixup_utf8($id) {
+
+ $dbl = doConnect('latin1');
+
+ if (empty($dbl)) {
+ return;
+ }
+
+ $dbu = doConnect('utf8');
+
+ if (empty($dbu)) {
+ return;
+ }
+
+ // Do a separate DB connection
+
+ $sth = $dbu->prepare("UPDATE notice SET content = UNHEX(?), rendered = UNHEX(?) WHERE id = ?");
+
+ if (PEAR::isError($sth)) {
+ echo "ERROR: " . $sth->getMessage() . "\n";
+ return;
+ }
+
+ $sql = 'SELECT id, content, rendered FROM notice ' .
+ 'WHERE LENGTH(content) != CHAR_LENGTH(content)';
+
+ if (!empty($id)) {
+ $sql .= ' AND id < ' . $id;
+ }
+
+ $sql .= ' ORDER BY id DESC';
+
+ $rn = $dbl->query($sql);
+
+ if (PEAR::isError($rn)) {
+ echo "ERROR: " . $rn->getMessage() . "\n";
+ return;
+ }
+
+ echo "Number of rows: " . $rn->numRows() . "\n";
+
+ $notice = array();
+
+ while (DB_OK == $rn->fetchInto($notice)) {
+
+ $id = ($notice[0])+0;
+ $content = bin2hex($notice[1]);
+ $rendered = bin2hex($notice[2]);
+
+ echo "$id...";
+
+ $result =& $dbu->execute($sth, array($content, $rendered, $id));
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ continue;
+ }
+
+ $cnt = $dbu->affectedRows();
+
+ if ($cnt != 1) {
+ echo "ERROR: 0 rows affected\n";
+ continue;
+ }
+
+ $notice = Notice::staticGet('id', $id);
+ $notice->decache();
+
+ echo "OK\n";
+ }
+}
+
+function doConnect($charset)
+{
+ $db = DB::connect(common_config('db', 'database'),
+ array('persistent' => false));
+
+ if (PEAR::isError($db)) {
+ echo "ERROR: " . $db->getMessage() . "\n";
+ return NULL;
+ }
+
+ $result = $db->query("SET NAMES $charset");
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ $db->disconnect();
+ return NULL;
+ }
+
+ $result = $db->autoCommit(true);
+
+ if (PEAR::isError($result)) {
+ echo "ERROR: " . $result->getMessage() . "\n";
+ $db->disconnect();
+ return NULL;
+ }
+
+ return $db;
+}
+
+$id = ($argc > 1) ? $argv[1] : null;
+
+fixup_utf8($id);