From 1acc7d66c63e0b9f794791e3a2e2b0f60bc2ebb1 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 30 Sep 2010 11:29:31 -0700 Subject: Always specify UTF-8 targt charset for html_entity_decode(); default is 8-bit ISO-8859-1 which causes things to break when we later pass them through things that expect to work with UTF-8. For instance, running through preg_replace() with the /u option results in NULL, leading to problems with OStatus and SubMirror generating their plaintext versions and doing length-cropping. --- scripts/importtwitteratom.php | 2 +- scripts/install_cli.php | 2 +- scripts/restoreuser.php | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/importtwitteratom.php b/scripts/importtwitteratom.php index 261dfb1d0..a29526f27 100644 --- a/scripts/importtwitteratom.php +++ b/scripts/importtwitteratom.php @@ -89,7 +89,7 @@ function importActivityStream($user, $doc) $html = htmLawed($html, $config); - $content = html_entity_decode(strip_tags($html)); + $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8'); $notice = Notice::saveNew($user->id, $content, diff --git a/scripts/install_cli.php b/scripts/install_cli.php index 61fbe18ef..dadbcf66f 100755 --- a/scripts/install_cli.php +++ b/scripts/install_cli.php @@ -208,7 +208,7 @@ END_HELP; $breakout = preg_replace('/+]\bhref="(.*)"[^>]*>(.*)<\/a>/', '\2 <\1>', $html); - return html_entity_decode(strip_tags($breakout)); + return html_entity_decode(strip_tags($breakout), ENT_QUOTES, 'UTF-8'); } } diff --git a/scripts/restoreuser.php b/scripts/restoreuser.php index de3816dd5..82eb9bbaa 100644 --- a/scripts/restoreuser.php +++ b/scripts/restoreuser.php @@ -213,7 +213,7 @@ function postNote($user, $activity) // Get (safe!) HTML and text versions of the content $rendered = purify($sourceContent); - $content = html_entity_decode(strip_tags($rendered)); + $content = html_entity_decode(strip_tags($rendered), ENT_QUOTES, 'UTF-8'); $shortened = common_shorten_links($content); -- cgit v1.2.3-54-g00ecf