From ac75772150c3fe9411408ac44db04e774d095aa0 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Mon, 27 Jul 2009 13:42:03 -0400 Subject: Sanitize html returned by oEmbed providers to protect laconica from XSS attacks --- extlib/htmLawed/htmLawedTest.php | 592 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 592 insertions(+) create mode 100644 extlib/htmLawed/htmLawedTest.php (limited to 'extlib/htmLawed/htmLawedTest.php') diff --git a/extlib/htmLawed/htmLawedTest.php b/extlib/htmLawed/htmLawedTest.php new file mode 100644 index 000000000..776828699 --- /dev/null +++ b/extlib/htmLawed/htmLawedTest.php @@ -0,0 +1,592 @@ + $v){ + $_POST[$k] = stripslashes($v); + } + ini_set('magic_quotes_gpc', 0); +} +set_magic_quotes_runtime(0); + +$_POST['enc'] = (isset($_POST['enc']) and preg_match('`^[-\w]+$`', $_POST['enc'])) ? $_POST['enc'] : 'utf-8'; + +// token for anti-CSRF +if(count($_POST)){ + if((empty($_GET['pre']) and ((!empty($_POST['token']) and !empty($_SESSION['token']) and $_POST['token'] != $_SESSION['token']) or empty($_POST[$_sid]) or $_POST[$_sid] != session_id() or empty($_COOKIE[$_sid]) or $_COOKIE[$_sid] != session_id())) or ($_POST[$_sid] != session_id())){ + $_POST = array('enc'=>'utf-8'); + } +} +if(empty($_GET['pre'])){ + $_SESSION['token'] = md5(uniqid(rand(), 1)); + $token = $_SESSION['token']; + session_regenerate_id(1); +} + +// compress +if(function_exists('gzencode') && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && preg_match('`gzip|deflate`i', $_SERVER['HTTP_ACCEPT_ENCODING']) && !ini_get('zlib.output_compression')){ + ob_start('ob_gzhandler'); +} + +// HTM for unprocessed +if(isset($_POST['inputH'])){ + echo 'htmLawed test: HTML view of unprocessed input

  Rendering of unprocessed input without an HTML doctype or charset declaration     close window | htmLawed test page

', $_POST['inputH'], '
'; + exit; +} + +// main +$_POST['text'] = isset($_POST['text']) ? $_POST['text'] : 'text to process; < '. $_limit. ' characters'. ($_hlimit ? ' (for binary hexdump view, < '. $_hlimit. ')' : ''); +$do = (!empty($_POST[$_sid]) && isset($_POST['text'][0]) && !isset($_POST['text'][$_limit])) ? 1 : 0; +$limit_exceeded = isset($_POST['text'][$_limit]) ? 1 : 0; +$pre_mem = memory_get_usage(); +$validation = (!empty($_POST[$_sid]) and isset($_POST['w3c_validate'][0])) ? 1 : 0; +include './htmLawed.php'; + +function format($t){ + $t = "\n". str_replace(array("\t", "\r\n", "\r", '&', '<', '>', "\n"), array(' ', "\n", "\n", '&', '<', '>', "¬
\n"), $t); + return str_replace(array('
', "\n ", ' '), array("\n
\n", "\n ", '  '), $t); +} + +function hexdump($d){ +// Mainly by Aidan Lister , Peter Waller + $hexi = ''; + $ascii = ''; + ob_start(); + echo '
';
+ $offset = 0;
+ $len = strlen($d);
+ for($i=$j=0; $i<$len; $i++)
+ {
+  // Convert to hexidecimal
+  $hexi .= sprintf("%02X ", ord($d[$i]));
+  // Replace non-viewable bytes with '.'
+  if(ord($d[$i]) >= 32){
+   $ascii .= htmlspecialchars($d[$i]);
+  }else{
+   $ascii .= '.';
+  } 
+  // Add extra column spacing
+  if($j == 7){
+   $hexi .= ' ';
+   $ascii .= '  ';
+  }
+  // Add row
+  if(++$j == 16 || $i == $len-1){
+   // Join the hexi / ascii output
+   echo sprintf("%04X   %-49s   %s", $offset, $hexi, $ascii);   
+   // Reset vars
+   $hexi = $ascii = '';
+   $offset += 16;
+   $j = 0;  
+   // Add newline   
+   if ($i !== $len-1){
+    echo "\n";
+   }
+  }
+ }
+ echo '
'; + $o = ob_get_contents(); + ob_end_clean(); + return $o; +} +?> + + + + + + + + +htmLawed (<?php echo hl_version();?>) test + + +
+ +
HTMLAWED TEST
+htm / txt documentation
+ +Input » (max. chars) + +
+ +
+ + +
+ + +'; + } +?> + + + + + + + + + + Validator tools: '; + } +} +?> + +Encoding: + +
+
+ +Input text is too long!
'; +} +?> + +
+ +Settings » + + +
+ +$v){ + if($k[0] == 'h' && $v != 'nil'){ + $cfg[substr($k, 1)] = $v; + } + } + + if($cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){ + $cfg['anti_link_spam'] = array($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); + } + unset($cfg['anti_link_spam11'], $cfg['anti_link_spam12']); + if($cfg['anti_mail_spam'] == 1){ + $cfg['anti_mail_spam'] = isset($cfg['anti_mail_spam1'][0]) ? $cfg['anti_mail_spam1'] : 0; + } + unset($cfg['anti_mail_spam11']); + if($cfg['deny_attribute'] == 1){ + $cfg['deny_attribute'] = isset($cfg['deny_attribute1'][0]) ? $cfg['deny_attribute1'] : 0; + } + unset($cfg['deny_attribute1']); + if($cfg['tidy'] == 2){ + $cfg['tidy'] = isset($cfg['tidy2'][0]) ? $cfg['tidy2'] : 0; + } + unset($cfg['tidy2']); + if($cfg['unique_ids'] == 2){ + $cfg['unique_ids'] = isset($cfg['unique_ids2'][0]) ? $cfg['unique_ids2'] : 1; + } + unset($cfg['unique_ids2']); + unset($cfg['and_mark']); // disabling and_mark + + $cfg['show_setting'] = 'hlcfg'; + $st = microtime(); + $out = htmLawed($_POST['text'], $cfg, str_replace(array('$', '{'), '', $_POST['spec'])); + $et = microtime(); + echo '
Input code » ', strlen($_POST['text']), ' chars, ~', round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2), ' tags ', (!isset($_POST['text'][$_hlimit]) ? ' Input binary » ' : ''), ' Finalized internal settings »  ', '
Output » htmLawed processing time ', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), ' s', (($mem = memory_get_peak_usage()) !== false ? ', peak memory usage '. round(($mem-$pre_mem)/1048576, 2). ' MB' : ''), '
'; + if($_w3c_validate && $validation) + { +?> + + + + +
Output code »
', format($out), '
', (!isset($_POST['text'][$_hlimit]) ? '
Output binary »' : ''), '
Output rendered »
', $out, '
'; +} +else{ +?> + +
+ +
Use with a Javascript- and cookie-enabled, relatively new version of a common browser. Submitted input will also be HTML-rendered (XHTML 1) after htmLawed-filtering. + +
You can use text from this collection of test-cases in the input. Set the character encoding of the browser to Unicode/utf-8 before copying.' : ''); ?> + +

For anti-XSS tests, try the special test-page or see these results. + +

Change Encoding to reflect the character encoding of the input text. Even then, it may not work or some characters may not display properly because of variable browser support and because of the form interface. Developers can write some PHP code to capture the filtered input to a file if this is important. +

Refer to the htmLawed documentation (htm/txt) for details about Settings, and htmLawed's behavior and limitations. For Settings, incorrectly-specified values like regular expressions are silently ignored. One or more settings form-fields may have been disabled. Some characters are not allowed in the Spec field. + + +

Hovering the mouse over some of the text can provide additional information in some browsers.
+ + + +

Because of character-encoding issues, the W3C validator (anyway not perfect) may reject validation requests or invalidate otherwise-valid code, esp. if text was copy-pasted in the input box. Local applications like the HTML Validator Firefox browser add-on may be useful in such cases.
+ + + +
+ + + +
+ + \ No newline at end of file -- cgit v1.2.3-54-g00ecf