summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZach Copley <zach@status.net>2010-03-01 14:58:06 -0800
committerZach Copley <zach@status.net>2010-03-03 09:52:22 -0800
commit1fd91de82c0be74ae2305f1412b1cd1b2948dfbc (patch)
tree84d1b3cad081b98c201737c15fea971bf40c1e13
parent6cc26a613b7849103d8cfae674bb3a91a7161656 (diff)
Upgrade XML output scrubbing to better deal with newline and a few other chars
-rw-r--r--lib/util.php24
1 files changed, 22 insertions, 2 deletions
diff --git a/lib/util.php b/lib/util.php
index 8381bc63c..f793cc10e 100644
--- a/lib/util.php
+++ b/lib/util.php
@@ -770,8 +770,28 @@ function common_shorten_links($text)
function common_xml_safe_str($str)
{
- // Neutralize control codes and surrogates
- return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
+ // Replace common eol and extra whitespace input chars
+ $unWelcome = array(
+ "\t", // tab
+ "\n", // newline
+ "\r", // cr
+ "\0", // null byte eos
+ "\x0B" // vertical tab
+ );
+
+ $replacement = array(
+ ' ', // single space
+ ' ',
+ '', // nothing
+ '',
+ ' '
+ );
+
+ $str = str_replace($unWelcome, $replacement, $str);
+
+ // Neutralize any additional control codes and UTF-16 surrogates
+ // (Twitter uses '*')
+ return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
}
function common_tag_link($tag)