summaryrefslogtreecommitdiff
path: root/lib/util.php
diff options
context:
space:
mode:
authorBrion Vibber <brion@pobox.com>2010-10-06 13:00:30 -0700
committerBrion Vibber <brion@pobox.com>2010-10-06 13:00:30 -0700
commitebfa8bce27d341a3cd10c53d5673b75b60c212bb (patch)
treeb8ce7e2815d3b5b11657983228b2f16bc1508f10 /lib/util.php
parent0aa9f08dd29357d92711c4d101a5d8da5c198f6b (diff)
Basic validation of UTF-8 input via GET/POST vars: invalid UTF-8 sequences will cause the string to drop. Not necessarily super-thorough; should be improved in future to drop individual bad sequences, do normalization of combining forms, etc. General input validation (for ints, types of strings, etc) still would be good to have!
Diffstat (limited to 'lib/util.php')
-rw-r--r--lib/util.php42
1 files changed, 35 insertions, 7 deletions
diff --git a/lib/util.php b/lib/util.php
index dc853f657..35fcfdb09 100644
--- a/lib/util.php
+++ b/lib/util.php
@@ -906,6 +906,28 @@ function common_shorten_links($text, $always = false)
return common_replace_urls_callback($text, array('File_redirection', 'makeShort'));
}
+/**
+ * Very basic stripping of invalid UTF-8 input text.
+ *
+ * @param string $str
+ * @return mixed string or null if invalid input
+ *
+ * @todo ideally we should drop bad chars, and maybe do some of the checks
+ * from common_xml_safe_str. But we can't strip newlines, etc.
+ * @todo Unicode normalization might also be useful, but not needed now.
+ */
+function common_validate_utf8($str)
+{
+ // preg_replace will return NULL on invalid UTF-8 input.
+ return preg_replace('//u', '', $str);
+}
+
+/**
+ * Make sure an arbitrary string is safe for output in XML as a single line.
+ *
+ * @param string $str
+ * @return string
+ */
function common_xml_safe_str($str)
{
// Replace common eol and extra whitespace input chars
@@ -1663,19 +1685,25 @@ function common_config($main, $sub)
array_key_exists($sub, $config[$main])) ? $config[$main][$sub] : false;
}
+/**
+ * Pull arguments from a GET/POST/REQUEST array with first-level input checks:
+ * strips "magic quotes" slashes if necessary, and kills invalid UTF-8 strings.
+ *
+ * @param array $from
+ * @return array
+ */
function common_copy_args($from)
{
$to = array();
$strip = get_magic_quotes_gpc();
foreach ($from as $k => $v) {
- if($strip) {
- if(is_array($v)) {
- $to[$k] = common_copy_args($v);
- } else {
- $to[$k] = stripslashes($v);
- }
+ if(is_array($v)) {
+ $to[$k] = common_copy_args($v);
} else {
- $to[$k] = $v;
+ if ($strip) {
+ $v = stripslashes($v);
+ }
+ $to[$k] = strval(common_validate_utf8($v));
}
}
return $to;