diff options
Diffstat (limited to 'includes/media')
-rw-r--r-- | includes/media/BitmapMetadataHandler.php | 6 | ||||
-rw-r--r-- | includes/media/JpegMetadataExtractor.php | 2 | ||||
-rw-r--r-- | includes/media/XMP.php | 96 |
3 files changed, 100 insertions, 4 deletions
diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php index dd41c388..1d790155 100644 --- a/includes/media/BitmapMetadataHandler.php +++ b/includes/media/BitmapMetadataHandler.php @@ -154,7 +154,7 @@ class BitmapMetadataHandler { * @throws MWException On invalid file. */ static function Jpeg( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $meta = new self(); $seg = JpegMetadataExtractor::segmentSplitter( $filename ); @@ -196,7 +196,7 @@ class BitmapMetadataHandler { * @return array Array for storage in img_metadata. */ public static function PNG( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $meta = new self(); $array = PNGMetadataExtractor::getMetadata( $filename ); @@ -236,7 +236,7 @@ class BitmapMetadataHandler { $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' ); } - if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) { + if ( $baseArray['xmp'] !== '' && XMPReader::isSupported() ) { $xmp = new XMPReader(); $xmp->parse( $baseArray['xmp'] ); $xmpRes = $xmp->getResults(); diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php index 8c5b46bb..aaa9930a 100644 --- a/includes/media/JpegMetadataExtractor.php +++ b/includes/media/JpegMetadataExtractor.php @@ -48,7 +48,7 @@ class JpegMetadataExtractor { * @throws MWException If given invalid file. */ static function segmentSplitter( $filename ) { - $showXMP = function_exists( 'xml_parser_create_ns' ); + $showXMP = XMPReader::isSupported(); $segmentCount = 0; diff --git a/includes/media/XMP.php b/includes/media/XMP.php index cdbd5ab2..a3f45e6c 100644 --- a/includes/media/XMP.php +++ b/includes/media/XMP.php @@ -80,6 +80,12 @@ class XMPReader { /** @var int */ private $extendedXMPOffset = 0; + /** @var int Flag determining if the XMP is safe to parse **/ + private $parsable = 0; + + /** @var string Buffer of XML to parse **/ + private $xmlParsableBuffer = ''; + /** * These are various mode constants. * they are used to figure out what to do @@ -108,6 +114,12 @@ class XMPReader { const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; const NS_XML = 'http://www.w3.org/XML/1998/namespace'; + // States used while determining if XML is safe to parse + const PARSABLE_UNKNOWN = 0; + const PARSABLE_OK = 1; + const PARSABLE_BUFFERING = 2; + const PARSABLE_NO = 3; + /** * Constructor. * @@ -145,6 +157,9 @@ class XMPReader { array( $this, 'endElement' ) ); xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); + + $this->parsable = self::PARSABLE_UNKNOWN; + $this->xmlParsableBuffer = ''; } /** Destroy the xml parser @@ -156,6 +171,13 @@ class XMPReader { xml_parser_free( $this->xmlParser ); } + /** + * Check if this instance supports using this class + */ + public static function isSupported() { + return function_exists( 'xml_parser_create_ns' ) && class_exists( 'XMLReader' ); + } + /** Get the result array. Do some post-processing before returning * the array, and transform any metadata that is special-cased. * @@ -305,6 +327,27 @@ class XMPReader { wfRestoreWarnings(); } + // Ensure the XMP block does not have an xml doctype declaration, which + // could declare entities unsafe to parse with xml_parse (T85848/T71210). + if ( $this->parsable !== self::PARSABLE_OK ) { + if ( $this->parsable === self::PARSABLE_NO ) { + throw new MWException( 'Unsafe doctype declaration in XML.' ); + } + + $content = $this->xmlParsableBuffer . $content; + if ( !$this->checkParseSafety( $content ) ) { + if ( !$allOfIt && $this->parsable !== self::PARSABLE_NO ) { + // parse wasn't Unsuccessful yet, so return true + // in this case. + return true; + } + $msg = ( $this->parsable === self::PARSABLE_NO ) ? + 'Unsafe doctype declaration in XML.' : + 'No root element found in XML.'; + throw new MWException( $msg ); + } + } + $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); if ( !$ok ) { $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); @@ -437,6 +480,59 @@ class XMPReader { } } + /** + * Check if a block of XML is safe to pass to xml_parse, i.e. doesn't + * contain a doctype declaration which could contain a dos attack if we + * parse it and expand internal entities (T85848). + * + * @param string $content xml string to check for parse safety + * @return bool true if the xml is safe to parse, false otherwise + */ + private function checkParseSafety( $content ) { + $reader = new XMLReader(); + $result = null; + + // For XMLReader to parse incomplete/invalid XML, it has to be open()'ed + // instead of using XML(). + $reader->open( + 'data://text/plain,' . urlencode( $content ), + null, + LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NONET + ); + + $oldDisable = libxml_disable_entity_loader( true ); + $reader->setParserProperty( XMLReader::SUBST_ENTITIES, false ); + + // Even with LIBXML_NOWARNING set, XMLReader::read gives a warning + // when parsing truncated XML, which causes unit tests to fail. + wfSuppressWarnings(); + while ( $reader->read() ) { + if ( $reader->nodeType === XMLReader::ELEMENT ) { + // Reached the first element without hitting a doctype declaration + $this->parsable = self::PARSABLE_OK; + $result = true; + break; + } + if ( $reader->nodeType === XMLReader::DOC_TYPE ) { + $this->parsable = self::PARSABLE_NO; + $result = false; + break; + } + } + wfRestoreWarnings(); + libxml_disable_entity_loader( $oldDisable ); + + if ( !is_null( $result ) ) { + return $result; + } + + // Reached the end of the parsable xml without finding an element + // or doctype. Buffer and try again. + $this->parsable = self::PARSABLE_BUFFERING; + $this->xmlParsableBuffer = $content; + return false; + } + /** When we hit a closing element in MODE_IGNORE * Check to see if this is the element we started to ignore, * in which case we get out of MODE_IGNORE |