diff options
Diffstat (limited to 'includes/libs')
-rw-r--r-- | includes/libs/XmlTypeCheck.php | 251 |
1 files changed, 161 insertions, 90 deletions
diff --git a/includes/libs/XmlTypeCheck.php b/includes/libs/XmlTypeCheck.php index aca857e9..31a4e28a 100644 --- a/includes/libs/XmlTypeCheck.php +++ b/includes/libs/XmlTypeCheck.php @@ -2,6 +2,11 @@ /** * XML syntax and type checker. * + * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us + * more control over the expansion of XML entities. When passed to the + * callback, entities will be fully expanded, but may report the XML is + * invalid if expanding the entities are likely to cause a DoS. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -25,7 +30,7 @@ class XmlTypeCheck { * Will be set to true or false to indicate whether the file is * well-formed XML. Note that this doesn't check schema validity. */ - public $wellFormed = false; + public $wellFormed = null; /** * Will be set to true if the optional element filter returned @@ -78,12 +83,7 @@ class XmlTypeCheck { function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) { $this->filterCallback = $filterCallback; $this->parserOptions = array_merge( $this->parserOptions, $options ); - - if ( $isFile ) { - $this->validateFromFile( $input ); - } else { - $this->validateFromString( $input ); - } + $this->validateFromInput( $input, $isFile ); } /** @@ -125,140 +125,211 @@ class XmlTypeCheck { return $this->rootElement; } + /** - * Get an XML parser with the root element handler. - * @see XmlTypeCheck::rootElementOpen() - * @return resource a resource handle for the XML parser + * @param string $fname the filename */ - private function getParser() { - $parser = xml_parser_create_ns( 'UTF-8' ); - // case folding violates XML standard, turn it off - xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); - xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false ); - if ( $this->parserOptions['processing_instruction_handler'] ) { - xml_set_processing_instruction_handler( - $parser, - array( $this, 'processingInstructionHandler' ) - ); + private function validateFromInput( $xml, $isFile ) { + $reader = new XMLReader(); + if ( $isFile ) { + $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING ); + } else { + $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING ); + } + if ( $s !== true ) { + // Couldn't open the XML + $this->wellFormed = false; + } else { + $oldDisable = libxml_disable_entity_loader( true ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); + try { + $this->validate( $reader ); + } catch ( Exception $e ) { + // Calling this malformed, because we didn't parse the whole + // thing. Maybe just an external entity refernce. + $this->wellFormed = false; + $reader->close(); + libxml_disable_entity_loader( $oldDisable ); + throw $e; + } + $reader->close(); + libxml_disable_entity_loader( $oldDisable ); } - return $parser; } - /** - * @param string $fname the filename - */ - private function validateFromFile( $fname ) { - $parser = $this->getParser(); - - if ( file_exists( $fname ) ) { - $file = fopen( $fname, "rb" ); - if ( $file ) { - do { - $chunk = fread( $file, 32768 ); - $ret = xml_parse( $parser, $chunk, feof( $file ) ); - if ( $ret == 0 ) { - $this->wellFormed = false; - fclose( $file ); - xml_parser_free( $parser ); - return; + private function readNext( XMLReader $reader ) { + set_error_handler( array( $this, 'XmlErrorHandler' ) ); + $ret = $reader->read(); + restore_error_handler(); + return $ret; + } + + public function XmlErrorHandler( $errno, $errstr ) { + $this->wellFormed = false; + } + + private function validate( $reader ) { + + // First, move through anything that isn't an element, and + // handle any processing instructions with the callback + do { + if( !$this->readNext( $reader ) ) { + // Hit the end of the document before any elements + $this->wellFormed = false; + return; + } + if ( $reader->nodeType === XMLReader::PI ) { + $this->processingInstructionHandler( $reader->name, $reader->value ); + } + } while ( $reader->nodeType != XMLReader::ELEMENT ); + + // Process the rest of the document + do { + switch ( $reader->nodeType ) { + case XMLReader::ELEMENT: + $name = $this->expandNS( + $reader->name, + $reader->namespaceURI + ); + if ( $this->rootElement === '' ) { + $this->rootElement = $name; } - } while ( !feof( $file ) ); + $empty = $reader->isEmptyElement; + $attrs = $this->getAttributesArray( $reader ); + $this->elementOpen( $name, $attrs ); + if ( $empty ) { + $this->elementClose(); + } + break; + + case XMLReader::END_ELEMENT: + $this->elementClose(); + break; + + case XMLReader::WHITESPACE: + case XMLReader::SIGNIFICANT_WHITESPACE: + case XMLReader::CDATA: + case XMLReader::TEXT: + $this->elementData( $reader->value ); + break; - fclose( $file ); + case XMLReader::ENTITY_REF: + // Unexpanded entity (maybe external?), + // don't send to the filter (xml_parse didn't) + break; + + case XMLReader::COMMENT: + // Don't send to the filter (xml_parse didn't) + break; + + case XMLReader::PI: + // Processing instructions can happen after the header too + $this->processingInstructionHandler( + $reader->name, + $reader->value + ); + break; + default: + // One of DOC, DOC_TYPE, ENTITY, END_ENTITY, + // NOTATION, or XML_DECLARATION + // xml_parse didn't send these to the filter, so we won't. } + + } while ( $this->readNext( $reader ) ); + + if ( $this->stackDepth !== 0 ) { + $this->wellFormed = false; + } elseif ( $this->wellFormed === null ) { + $this->wellFormed = true; } - $this->wellFormed = true; - xml_parser_free( $parser ); } /** - * - * @param string $string the XML-input-string to be checked. + * Get all of the attributes for an XMLReader's current node + * @param $r XMLReader + * @return array of attributes */ - private function validateFromString( $string ) { - $parser = $this->getParser(); - $ret = xml_parse( $parser, $string, true ); - xml_parser_free( $parser ); - if ( $ret == 0 ) { - $this->wellFormed = false; - return; + private function getAttributesArray( XMLReader $r ) { + $attrs = array(); + while ( $r->moveToNextAttribute() ) { + if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) { + // XMLReader treats xmlns attributes as normal + // attributes, while xml_parse doesn't + continue; + } + $name = $this->expandNS( $r->name, $r->namespaceURI ); + $attrs[$name] = $r->value; } - $this->wellFormed = true; + return $attrs; } /** - * @param $parser - * @param $name - * @param $attribs + * @param $name element or attribute name, maybe with a full or short prefix + * @param $namespaceURI the namespaceURI + * @return string the name prefixed with namespaceURI */ - private function rootElementOpen( $parser, $name, $attribs ) { - $this->rootElement = $name; - - if ( is_callable( $this->filterCallback ) ) { - xml_set_element_handler( - $parser, - array( $this, 'elementOpen' ), - array( $this, 'elementClose' ) - ); - xml_set_character_data_handler( $parser, array( $this, 'elementData' ) ); - $this->elementOpen( $parser, $name, $attribs ); - } else { - // We only need the first open element - xml_set_element_handler( $parser, false, false ); + private function expandNS( $name, $namespaceURI ) { + if ( $namespaceURI ) { + $parts = explode( ':', $name ); + $localname = array_pop( $parts ); + return "$namespaceURI:$localname"; } + return $name; } /** - * @param $parser * @param $name * @param $attribs */ - private function elementOpen( $parser, $name, $attribs ) { + private function elementOpen( $name, $attribs ) { $this->elementDataContext[] = array( $name, $attribs ); $this->elementData[] = ''; $this->stackDepth++; } /** - * @param $parser - * @param $name */ - private function elementClose( $parser, $name ) { + private function elementClose() { list( $name, $attribs ) = array_pop( $this->elementDataContext ); $data = array_pop( $this->elementData ); $this->stackDepth--; - if ( call_user_func( - $this->filterCallback, - $name, - $attribs, - $data - ) ) { - // Filter hit! + if ( is_callable( $this->filterCallback ) + && call_user_func( + $this->filterCallback, + $name, + $attribs, + $data + ) + ) { + // Filter hit $this->filterMatch = true; } } /** - * @param $parser * @param $data */ - private function elementData( $parser, $data ) { - // xml_set_character_data_handler breaks the data on & characters, so - // we collect any data here, and we'll run the callback in elementClose + private function elementData( $data ) { + // Collect any data here, and we'll run the callback in elementClose $this->elementData[ $this->stackDepth - 1 ] .= trim( $data ); } /** - * @param $parser * @param $target * @param $data */ - private function processingInstructionHandler( $parser, $target, $data ) { - if ( call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data ) ) { - // Filter hit! - $this->filterMatch = true; + private function processingInstructionHandler( $target, $data ) { + if ( $this->parserOptions['processing_instruction_handler'] ) { + if ( call_user_func( + $this->parserOptions['processing_instruction_handler'], + $target, + $data + ) ) { + // Filter hit! + $this->filterMatch = true; + } } } } |