(X)HTML parser * Based on work by Jan Hidders and Magnus Manske * To use, set * $wgUseXMLparser = true ; * $wgEnableParserCache = false ; * $wgWiki2xml to the path and executable of the command line version (cli) * in LocalSettings.php * @package MediaWiki * @subpackage Experimental */ /** * the base class for an element * @package MediaWiki * @subpackage Experimental */ class element { var $name = ''; var $attrs = array (); var $children = array (); /** * This finds the ATTRS element and returns the ATTR sub-children as a single string * @todo FIXME $parser always empty when calling makeXHTML() */ function getSourceAttrs() { $ret = ''; foreach ($this->children as $child) { if (!is_string($child) AND $child->name == 'ATTRS') { $ret = $child->makeXHTML($parser); } } return $ret; } /** * This collects the ATTR thingies for getSourceAttrs() */ function getTheseAttrs() { $ret = array (); foreach ($this->children as $child) { if (!is_string($child) AND $child->name == 'ATTR') { $ret[] = $child->attrs["NAME"]."='".$child->children[0]."'"; } } return implode(' ', $ret); } function fixLinkTails(& $parser, $key) { $k2 = $key +1; if (!isset ($this->children[$k2])) return; if (!is_string($this->children[$k2])) return; if (is_string($this->children[$key])) return; if ($this->children[$key]->name != "LINK") return; $n = $this->children[$k2]; $s = ''; while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) { $s .= $n[0]; $n = substr($n, 1); } $this->children[$k2] = $n; if (count($this->children[$key]->children) > 1) { $kl = array_keys($this->children[$key]->children); $kl = array_pop($kl); $this->children[$key]->children[$kl]->children[] = $s; } else { $e = new element; $e->name = "LINKOPTION"; $t = $this->children[$key]->sub_makeXHTML($parser); $e->children[] = trim($t).$s; $this->children[$key]->children[] = $e; } } /** * This function generates the XHTML for the entire subtree */ function sub_makeXHTML(& $parser, $tag = '', $attr = '') { $ret = ''; $attr2 = $this->getSourceAttrs(); if ($attr != '' AND $attr2 != '') $attr .= ' '; $attr .= $attr2; if ($tag != '') { $ret .= '<'.$tag; if ($attr != '') $ret .= ' '.$attr; $ret .= '>'; } # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD # foreach ( array_keys ( $this->children ) AS $x ) # $this->fixLinkTails ( $parser , $x ) ; foreach ($this->children as $child) { if (is_string($child)) { $ret .= $child; } elseif ($child->name != 'ATTRS') { $ret .= $child->makeXHTML($parser); } } if ($tag != '') $ret .= ''.$tag.">\n"; return $ret; } /** * Link functions */ function createInternalLink(& $parser, $target, $display_title, $options) { global $wgUser; $skin = $wgUser->getSkin(); $tp = explode(':', $target); # tp = target parts $title = ''; # The plain title $language = ''; # The language/meta/etc. part $namespace = ''; # The namespace, if any $subtarget = ''; # The '#' thingy $nt = Title :: newFromText($target); $fl = strtoupper($this->attrs['FORCEDLINK']) == 'YES'; if ($fl || count($tp) == 1) { # Plain and simple case $title = $target; } else { # There's stuff missing here... if ($nt->getNamespace() == NS_IMAGE) { $options[] = $display_title; return $parser->makeImage($nt, implode('|', $options)); } else { # Default $title = $target; } } if ($language != '') { # External link within the WikiMedia project return "{language link}"; } else { if ($namespace != '') { # Link to another namespace, check for image/media stuff return "{namespace link}"; } else { return $skin->makeLink($target, $display_title); } } } /** @todo document */ function makeInternalLink(& $parser) { $target = ''; $option = array (); foreach ($this->children as $child) { if (is_string($child)) { # This shouldn't be the case! } else { if ($child->name == 'LINKTARGET') { $target = trim($child->makeXHTML($parser)); } else { $option[] = trim($child->makeXHTML($parser)); } } } if (count($option) == 0) $option[] = $target; # Create dummy display title $display_title = array_pop($option); return $this->createInternalLink($parser, $target, $display_title, $option); } /** @todo document */ function getTemplateXHTML($title, $parts, & $parser) { global $wgLang, $wgUser; $skin = $wgUser->getSkin(); $ot = $title; # Original title if (count(explode(':', $title)) == 1) $title = $wgLang->getNsText(NS_TEMPLATE).":".$title; $nt = Title :: newFromText($title); $id = $nt->getArticleID(); if ($id == 0) { # No/non-existing page return $skin->makeBrokenLink($title, $ot); } $a = 0; $tv = array (); # Template variables foreach ($parts AS $part) { $a ++; $x = explode('=', $part, 2); if (count($x) == 1) $key = "{$a}"; else $key = $x[0]; $value = array_pop($x); $tv[$key] = $value; } $art = new Article($nt); $text = $art->getContent(false); $parser->plain_parse($text, true, $tv); return $text; } /** * This function actually converts wikiXML into XHTML tags * @todo use switch() ! */ function makeXHTML(& $parser) { $ret = ''; $n = $this->name; # Shortcut if ($n == 'EXTENSION') { # Fix allowed HTML $old_n = $n; $ext = strtoupper($this->attrs['NAME']); switch($ext) { case 'B': case 'STRONG': $n = 'BOLD'; break; case 'I': case 'EM': $n = 'ITALICS'; break; case 'U': $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-) break; case 'S': $n = 'STRIKE'; break; case 'P': $n = 'PARAGRAPH'; break; case 'TABLE': $n = 'TABLE'; break; case 'TR': $n = 'TABLEROW'; break; case 'TD': $n = 'TABLECELL'; break; case 'TH': $n = 'TABLEHEAD'; break; case 'CAPTION': $n = 'CAPTION'; break; case 'NOWIKI': $n = 'NOWIKI'; break; } if ($n != $old_n) { unset ($this->attrs['NAME']); # Cleanup } elseif ($parser->nowiki > 0) { # No 'real' wiki tags allowed in nowiki section $n = ''; } } // $n = 'EXTENSION' switch($n) { case 'ARTICLE': $ret .= $this->sub_makeXHTML($parser); break; case 'HEADING': $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs['LEVEL']); break; case 'PARAGRAPH': $ret .= $this->sub_makeXHTML($parser, 'p'); break; case 'BOLD': $ret .= $this->sub_makeXHTML($parser, 'strong'); break; case 'ITALICS': $ret .= $this->sub_makeXHTML($parser, 'em'); break; # These don't exist as wiki markup case 'UNDERLINED': $ret .= $this->sub_makeXHTML($parser, 'u'); break; case 'STRIKE': $ret .= $this->sub_makeXHTML($parser, 'strike'); break; # HTML comment case 'COMMENT': # Comments are parsed out $ret .= ''; break; # Links case 'LINK': $ret .= $this->makeInternalLink($parser); break; case 'LINKTARGET': case 'LINKOPTION': $ret .= $this->sub_makeXHTML($parser); break; case 'TEMPLATE': $parts = $this->sub_makeXHTML($parser); $parts = explode('|', $parts); $title = array_shift($parts); $ret .= $this->getTemplateXHTML($title, $parts, & $parser); break; case 'TEMPLATEVAR': $x = $this->sub_makeXHTML($parser); if (isset ($parser->mCurrentTemplateOptions["{$x}"])) $ret .= $parser->mCurrentTemplateOptions["{$x}"]; break; # Internal use, not generated by wiki2xml parser case 'IGNORE': $ret .= $this->sub_makeXHTML($parser); case 'NOWIKI': $parser->nowiki++; $ret .= $this->sub_makeXHTML($parser, ''); $parser->nowiki--; # Unknown HTML extension case 'EXTENSION': # This is currently a dummy!!! $ext = $this->attrs['NAME']; $ret .= '<'.$ext.'>'; $ret .= $this->sub_makeXHTML($parser); $ret .= '</'.$ext.'> '; break; # Table stuff case 'TABLE': $ret .= $this->sub_makeXHTML($parser, 'table'); break; case 'TABLEROW': $ret .= $this->sub_makeXHTML($parser, 'tr'); break; case 'TABLECELL': $ret .= $this->sub_makeXHTML($parser, 'td'); break; case 'TABLEHEAD': $ret .= $this->sub_makeXHTML($parser, 'th'); break; case 'CAPTION': $ret .= $this->sub_makeXHTML($parser, 'caption'); break; case 'ATTRS': # SPECIAL CASE : returning attributes return $this->getTheseAttrs(); # Lists stuff case 'LISTITEM': if ($parser->mListType == 'dl') $ret .= $this->sub_makeXHTML($parser, 'dd'); else $ret .= $this->sub_makeXHTML($parser, 'li'); break; case 'LIST': $type = 'ol'; # Default if ($this->attrs['TYPE'] == 'bullet') $type = 'ul'; else if ($this->attrs['TYPE'] == 'indent') $type = 'dl'; $oldtype = $parser->mListType; $parser->mListType = $type; $ret .= $this->sub_makeXHTML($parser, $type); $parser->mListType = $oldtype; break; # Something else entirely default: $ret .= '<'.$n.'>'; $ret .= $this->sub_makeXHTML($parser); $ret .= '</'.$n.'> '; } // switch($n) $ret = "\n{$ret}\n"; $ret = str_replace("\n\n", "\n", $ret); return $ret; } /** * A function for additional debugging output */ function myPrint() { $ret = "