diff options
author | Luke Shumaker <LukeShu@sbcglobal.net> | 2014-01-28 09:50:25 -0500 |
---|---|---|
committer | Luke Shumaker <LukeShu@sbcglobal.net> | 2014-01-28 09:50:25 -0500 |
commit | 5744df39e15f85c6cc8a9faf8924d77e76d2b216 (patch) | |
tree | a8c8dd40a94d1fa0d5377566aa5548ae55a163da /includes/HtmlFormatter.php | |
parent | 4bb2aeca1d198391ca856aa16c40b8559c68daec (diff) | |
parent | 224b22a051051f6c2e494c3a2fb4adb42898e2d1 (diff) |
Merge branch 'archwiki'
Conflicts:
extensions/FluxBBAuthPlugin.php
extensions/SyntaxHighlight_GeSHi/README
extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php
extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.i18n.php
extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.php
extensions/SyntaxHighlight_GeSHi/geshi/docs/CHANGES
extensions/SyntaxHighlight_GeSHi/geshi/docs/THANKS
extensions/SyntaxHighlight_GeSHi/geshi/docs/TODO
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractClass.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractClass_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractMethod.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractPrivateClass.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractPrivateClass_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/AbstractPrivateMethod.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Class.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Class_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Constant.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Constructor.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Destructor.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Function.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Global.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/I.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Index.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Interface.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Interface_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/L.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Lminus.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Lplus.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Method.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Page.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Page_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateClass.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateClass_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateMethod.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/PrivateVariable.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/StaticMethod.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/StaticVariable.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/T.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Tminus.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Tplus.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/Variable.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/blank.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/class_folder.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/file.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/folder.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/function_folder.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/next_button.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/next_button_disabled.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/package.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/package_folder.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/previous_button.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/previous_button_disabled.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/private_class_logo.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/tutorial.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/tutorial_folder.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/api/media/images/up_button.png
extensions/SyntaxHighlight_GeSHi/geshi/docs/geshi-doc.html
extensions/SyntaxHighlight_GeSHi/geshi/docs/geshi-doc.txt
extensions/SyntaxHighlight_GeSHi/geshi/geshi.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/4cs.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/6502acme.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/6502kickass.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/6502tasm.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/68000devpac.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/abap.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/actionscript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/actionscript3.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/ada.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/algol68.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/apache.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/applescript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/apt_sources.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/asm.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/asp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/autoconf.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/autohotkey.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/autoit.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/avisynth.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/awk.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/bascomavr.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/bash.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/basic4gl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/bf.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/bibtex.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/blitzbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/bnf.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/boo.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/c.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/c_loadrunner.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/c_mac.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/caddcl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cadlisp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cfdg.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cfm.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/chaiscript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cil.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/clojure.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cmake.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cobol.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/coffeescript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cpp-qt.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cpp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/csharp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/css.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/cuesheet.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/d.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/dcs.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/delphi.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/diff.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/div.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/dos.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/dot.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/e.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/ecmascript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/eiffel.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/email.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/epc.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/erlang.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/euphoria.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/f1.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/falcon.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/fo.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/fortran.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/freebasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/fsharp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/gambas.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/gdb.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/genero.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/genie.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/gettext.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/glsl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/gml.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/gnuplot.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/go.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/groovy.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/gwbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/haskell.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/hicest.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/hq9plus.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/html4strict.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/html5.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/icon.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/idl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/ini.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/inno.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/intercal.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/io.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/j.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/java.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/java5.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/javascript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/jquery.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/kixtart.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/klonec.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/klonecpp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/latex.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lb.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lisp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/llvm.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/locobasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/logtalk.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lolcode.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lotusformulas.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lotusscript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lscript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lsl2.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/lua.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/m68k.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/magiksf.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/make.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/mapbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/matlab.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/mirc.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/mmix.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/modula2.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/modula3.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/mpasm.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/mxml.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/mysql.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/newlisp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/nsis.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/oberon2.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/objc.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/objeck.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/ocaml-brief.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/ocaml.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/oobas.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/oracle11.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/oracle8.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/oxygene.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/oz.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pascal.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pcre.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/per.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/perl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/perl6.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pf.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/php-brief.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/php.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pic16.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pike.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pixelbender.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pli.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/plsql.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/postgresql.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/povray.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/powerbuilder.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/powershell.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/proftpd.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/progress.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/prolog.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/properties.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/providex.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/purebasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/pycon.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/python.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/q.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/qbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/rails.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/rebol.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/reg.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/robots.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/rpmspec.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/rsplus.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/ruby.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/sas.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/scala.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/scheme.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/scilab.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/sdlbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/smalltalk.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/smarty.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/sql.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/systemverilog.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/tcl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/teraterm.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/text.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/thinbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/tsql.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/typoscript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/unicon.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/uscript.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/vala.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/vb.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/vbnet.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/verilog.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/vhdl.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/vim.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/visualfoxpro.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/visualprolog.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/whitespace.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/whois.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/winbatch.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/xbasic.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/xml.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/xorg_conf.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/xpp.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/yaml.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/z80.php
extensions/SyntaxHighlight_GeSHi/geshi/geshi/zxbasic.php
Diffstat (limited to 'includes/HtmlFormatter.php')
-rw-r--r-- | includes/HtmlFormatter.php | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/includes/HtmlFormatter.php b/includes/HtmlFormatter.php new file mode 100644 index 00000000..248a76fe --- /dev/null +++ b/includes/HtmlFormatter.php @@ -0,0 +1,356 @@ +<?php +/** + * Performs transformations of HTML by wrapping around libxml2 and working + * around its countless bugs. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +class HtmlFormatter { + /** + * @var DOMDocument + */ + private $doc; + + private $html; + private $itemsToRemove = array(); + private $elementsToFlatten = array(); + protected $removeMedia = false; + + /** + * Constructor + * + * @param string $html: Text to process + */ + public function __construct( $html ) { + $this->html = $html; + } + + /** + * Turns a chunk of HTML into a proper document + * @param string $html + * @return string + */ + public static function wrapHTML( $html ) { + return '<!doctype html><html><head></head><body>' . $html . '</body></html>'; + } + + /** + * Override this in descendant class to modify HTML after it has been converted from DOM tree + * @param string $html: HTML to process + * @return string: Processed HTML + */ + protected function onHtmlReady( $html ) { + return $html; + } + + /** + * @return DOMDocument: DOM to manipulate + */ + public function getDoc() { + if ( !$this->doc ) { + $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' ); + + // Workaround for bug that caused spaces before references + // to disappear during processing: + // https://bugzilla.wikimedia.org/show_bug.cgi?id=53086 + // + // Please replace with a better fix if one can be found. + $html = str_replace( ' <', ' <', $html ); + + libxml_use_internal_errors( true ); + $loader = libxml_disable_entity_loader(); + $this->doc = new DOMDocument(); + $this->doc->strictErrorChecking = false; + $this->doc->loadHTML( $html ); + libxml_disable_entity_loader( $loader ); + libxml_use_internal_errors( false ); + $this->doc->encoding = 'UTF-8'; + } + return $this->doc; + } + + /** + * Sets whether images/videos/sounds should be removed from output + * @param bool $flag + */ + public function setRemoveMedia( $flag = true ) { + $this->removeMedia = $flag; + } + + /** + * Adds one or more selector of content to remove. A subset of CSS selector + * syntax is supported: + * + * <tag> + * <tag>.class + * .<class> + * #<id> + * + * @param Array|string $selectors: Selector(s) of stuff to remove + */ + public function remove( $selectors ) { + $this->itemsToRemove = array_merge( $this->itemsToRemove, (array)$selectors ); + } + + /** + * Adds one or more element name to the list to flatten (remove tag, but not its content) + * Can accept undelimited regexes + * + * Note this interface may fail in surprising unexpected ways due to usage of regexes, + * so should not be relied on for HTML markup security measures. + * + * @param Array|string $elements: Name(s) of tag(s) to flatten + */ + public function flatten( $elements ) { + $this->elementsToFlatten = array_merge( $this->elementsToFlatten, (array)$elements ); + } + + /** + * Instructs the formatter to flatten all tags + */ + public function flattenAllTags() { + $this->flatten( '[?!]?[a-z0-9]+' ); + } + + /** + * Removes content we've chosen to remove + */ + public function filterContent() { + wfProfileIn( __METHOD__ ); + $removals = $this->parseItemsToRemove(); + + if ( !$removals ) { + wfProfileOut( __METHOD__ ); + return; + } + + $doc = $this->getDoc(); + + // Remove tags + + // You can't remove DOMNodes from a DOMNodeList as you're iterating + // over them in a foreach loop. It will seemingly leave the internal + // iterator on the foreach out of wack and results will be quite + // strange. Though, making a queue of items to remove seems to work. + $domElemsToRemove = array(); + foreach ( $removals['TAG'] as $tagToRemove ) { + $tagToRemoveNodes = $doc->getElementsByTagName( $tagToRemove ); + foreach ( $tagToRemoveNodes as $tagToRemoveNode ) { + if ( $tagToRemoveNode ) { + $domElemsToRemove[] = $tagToRemoveNode; + } + } + } + + $this->removeElements( $domElemsToRemove ); + + // Elements with named IDs + $domElemsToRemove = array(); + foreach ( $removals['ID'] as $itemToRemove ) { + $itemToRemoveNode = $doc->getElementById( $itemToRemove ); + if ( $itemToRemoveNode ) { + $domElemsToRemove[] = $itemToRemoveNode; + } + } + $this->removeElements( $domElemsToRemove ); + + // CSS Classes + $domElemsToRemove = array(); + $xpath = new DOMXpath( $doc ); + foreach ( $removals['CLASS'] as $classToRemove ) { + $elements = $xpath->query( '//*[contains(@class, "' . $classToRemove . '")]' ); + + /** @var $element DOMElement */ + foreach ( $elements as $element ) { + $classes = $element->getAttribute( 'class' ); + if ( preg_match( "/\b$classToRemove\b/", $classes ) && $element->parentNode ) { + $domElemsToRemove[] = $element; + } + } + } + $this->removeElements( $domElemsToRemove ); + + // Tags with CSS Classes + foreach ( $removals['TAG_CLASS'] as $classToRemove ) { + $parts = explode( '.', $classToRemove ); + + $elements = $xpath->query( + '//' . $parts[0] . '[@class="' . $parts[1] . '"]' + ); + + $this->removeElements( $elements ); + } + + wfProfileOut( __METHOD__ ); + } + + /** + * Removes a list of elelments from DOMDocument + * @param array|DOMNodeList $elements + */ + private function removeElements( $elements ) { + $list = $elements; + if ( $elements instanceof DOMNodeList ) { + $list = array(); + foreach ( $elements as $element ) { + $list[] = $element; + } + } + /** @var $element DOMElement */ + foreach ( $list as $element ) { + if ( $element->parentNode ) { + $element->parentNode->removeChild( $element ); + } + } + } + + /** + * libxml in its usual pointlessness converts many chars to entities - this function + * perfoms a reverse conversion + * @param string $html + * @return string + */ + private function fixLibXML( $html ) { + wfProfileIn( __METHOD__ ); + static $replacements; + if ( ! $replacements ) { + // We don't include rules like '"' => '&quot;' because entities had already been + // normalized by libxml. Using this function with input not sanitized by libxml is UNSAFE! + $replacements = new ReplacementArray( array( + '"' => '&quot;', + '&' => '&amp;', + '<' => '&lt;', + '>' => '&gt;', + ) ); + } + $html = $replacements->replace( $html ); + $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' ); + wfProfileOut( __METHOD__ ); + return $html; + } + + /** + * Performs final transformations and returns resulting HTML + * + * @param DOMElement|string|null $element: ID of element to get HTML from or false to get it from the whole tree + * @return string: Processed HTML + */ + public function getText( $element = null ) { + wfProfileIn( __METHOD__ ); + + if ( $this->doc ) { + if ( $element !== null && !( $element instanceof DOMElement ) ) { + $element = $this->doc->getElementById( $element ); + } + if ( $element ) { + $body = $this->doc->getElementsByTagName( 'body' )->item( 0 ); + $nodesArray = array(); + foreach ( $body->childNodes as $node ) { + $nodesArray[] = $node; + } + foreach ( $nodesArray as $nodeArray ) { + $body->removeChild( $nodeArray ); + } + $body->appendChild( $element ); + } + $html = $this->doc->saveHTML(); + $html = $this->fixLibXml( $html ); + } else { + $html = $this->html; + } + if ( wfIsWindows() ) { + // Appears to be cleanup for CRLF misprocessing of unknown origin + // when running server on Windows platform. + // + // If this error continues in the future, please track it down in the + // XML code paths if possible and fix there. + $html = str_replace( ' ', '', $html ); + } + $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html ); + $html = $this->onHtmlReady( $html ); + + if ( $this->elementsToFlatten ) { + $elements = implode( '|', $this->elementsToFlatten ); + $html = preg_replace( "#</?($elements)\\b[^>]*>#is", '', $html ); + } + + wfProfileOut( __METHOD__ ); + return $html; + } + + /** + * @param $selector: CSS selector to parse + * @param $type + * @param $rawName + * @return bool: Whether the selector was successfully recognised + */ + protected function parseSelector( $selector, &$type, &$rawName ) { + if ( strpos( $selector, '.' ) === 0 ) { + $type = 'CLASS'; + $rawName = substr( $selector, 1 ); + } elseif ( strpos( $selector, '#' ) === 0 ) { + $type = 'ID'; + $rawName = substr( $selector, 1 ); + } elseif ( strpos( $selector, '.' ) !== 0 && + strpos( $selector, '.' ) !== false ) + { + $type = 'TAG_CLASS'; + $rawName = $selector; + } elseif ( strpos( $selector, '[' ) === false + && strpos( $selector, ']' ) === false ) + { + $type = 'TAG'; + $rawName = $selector; + } else { + throw new MWException( __METHOD__ . "(): unrecognized selector '$selector'" ); + } + + return true; + } + + /** + * Transforms CSS selectors into an internal representation suitable for processing + * @return array + */ + protected function parseItemsToRemove() { + wfProfileIn( __METHOD__ ); + $removals = array( + 'ID' => array(), + 'TAG' => array(), + 'CLASS' => array(), + 'TAG_CLASS' => array(), + ); + + foreach ( $this->itemsToRemove as $itemToRemove ) { + $type = ''; + $rawName = ''; + if ( $this->parseSelector( $itemToRemove, $type, $rawName ) ) { + $removals[$type][] = $rawName; + } + } + + if ( $this->removeMedia ) { + $removals['TAG'][] = 'img'; + $removals['TAG'][] = 'audio'; + $removals['TAG'][] = 'video'; + } + + wfProfileOut( __METHOD__ ); + return $removals; + } +} |