From 81ea0f81173030c73cfc8dd46946d126d3d41622 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 11:35:01 -0500 Subject: Add HTMLPurifier to extlib HTMLPurifier defangs arbitrary submitted HTML. We're using it in the OStatus plugin, but it may be valuable for other parts of the codebase (I think OEmbed might benefit, for example). --- extlib/HTMLPurifier/HTMLPurifier.php | 237 +++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 extlib/HTMLPurifier/HTMLPurifier.php (limited to 'extlib/HTMLPurifier/HTMLPurifier.php') diff --git a/extlib/HTMLPurifier/HTMLPurifier.php b/extlib/HTMLPurifier/HTMLPurifier.php new file mode 100644 index 000000000..e3fce9c2a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier.php @@ -0,0 +1,237 @@ +config = HTMLPurifier_Config::create($config); + + $this->strategy = new HTMLPurifier_Strategy_Core(); + + } + + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + public function addFilter($filter) { + trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); + $this->filters[] = $filter; + } + + /** + * Filters an HTML snippet/document to be XSS-free and standards-compliant. + * + * @param $html String of HTML to purify + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. The parameter can also be any type + * that HTMLPurifier_Config::create() supports. + * @return Purified HTML + */ + public function purify($html, $config = null) { + + // :TODO: make the config merge in, instead of replace + $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + + // implementation is partially environment dependant, partially + // configuration dependant + $lexer = HTMLPurifier_Lexer::create($config); + + $context = new HTMLPurifier_Context(); + + // setup HTML generator + $this->generator = new HTMLPurifier_Generator($config, $context); + $context->register('Generator', $this->generator); + + // set up global context variables + if ($config->get('Core.CollectErrors')) { + // may get moved out if other facilities use it + $language_factory = HTMLPurifier_LanguageFactory::instance(); + $language = $language_factory->create($config, $context); + $context->register('Locale', $language); + + $error_collector = new HTMLPurifier_ErrorCollector($context); + $context->register('ErrorCollector', $error_collector); + } + + // setup id_accumulator context, necessary due to the fact that + // AttrValidator can be called from many places + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + + // setup filters + $filter_flags = $config->getBatch('Filter'); + $custom_filters = $filter_flags['Custom']; + unset($filter_flags['Custom']); + $filters = array(); + foreach ($filter_flags as $filter => $flag) { + if (!$flag) continue; + if (strpos($filter, '.') !== false) continue; + $class = "HTMLPurifier_Filter_$filter"; + $filters[] = new $class; + } + foreach ($custom_filters as $filter) { + // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat + $filters[] = $filter; + } + $filters = array_merge($filters, $this->filters); + // maybe prepare(), but later + + for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { + $html = $filters[$i]->preFilter($html, $config, $context); + } + + // purified HTML + $html = + $this->generator->generateFromTokens( + // list of tokens + $this->strategy->execute( + // list of un-purified tokens + $lexer->tokenizeHTML( + // un-purified HTML + $html, $config, $context + ), + $config, $context + ) + ); + + for ($i = $filter_size - 1; $i >= 0; $i--) { + $html = $filters[$i]->postFilter($html, $config, $context); + } + + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); + $this->context =& $context; + return $html; + } + + /** + * Filters an array of HTML snippets + * @param $config Optional HTMLPurifier_Config object for this operation. + * See HTMLPurifier::purify() for more details. + * @return Array of purified HTML + */ + public function purifyArray($array_of_html, $config = null) { + $context_array = array(); + foreach ($array_of_html as $key => $html) { + $array_of_html[$key] = $this->purify($html, $config); + $context_array[$key] = $this->context; + } + $this->context = $context_array; + return $array_of_html; + } + + /** + * Singleton for enforcing just one HTML Purifier in your system + * @param $prototype Optional prototype HTMLPurifier instance to + * overload singleton with, or HTMLPurifier_Config + * instance to configure the generated version with. + */ + public static function instance($prototype = null) { + if (!self::$instance || $prototype) { + if ($prototype instanceof HTMLPurifier) { + self::$instance = $prototype; + } elseif ($prototype) { + self::$instance = new HTMLPurifier($prototype); + } else { + self::$instance = new HTMLPurifier(); + } + } + return self::$instance; + } + + /** + * @note Backwards compatibility, see instance() + */ + public static function getInstance($prototype = null) { + return HTMLPurifier::instance($prototype); + } + +} + +// vim: et sw=4 sts=4 -- cgit v1.2.3-54-g00ecf