From ea6072fdd71f9b7ce86d13a3d52271f77bd8b02c Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Mon, 1 Mar 2010 14:58:06 -0800 Subject: Upgrade XML output scrubbing to better deal with newline and a few other chars --- lib/util.php | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/lib/util.php b/lib/util.php index 0052090f6..0a6725c7f 100644 --- a/lib/util.php +++ b/lib/util.php @@ -590,8 +590,28 @@ function common_shorten_links($text) function common_xml_safe_str($str) { - // Neutralize control codes and surrogates - return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str); + // Replace common eol and extra whitespace input chars + $unWelcome = array( + "\t", // tab + "\n", // newline + "\r", // cr + "\0", // null byte eos + "\x0B" // vertical tab + ); + + $replacement = array( + ' ', // single space + ' ', + '', // nothing + '', + ' ' + ); + + $str = str_replace($unWelcome, $replacement, $str); + + // Neutralize any additional control codes and UTF-16 surrogates + // (Twitter uses '*') + return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str); } function common_tag_link($tag) -- cgit v1.2.3