From b9b85843572bf283f48285001e276ba7e61b63f6 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Sun, 22 Feb 2009 13:37:51 +0100 Subject: updated to MediaWiki 1.14.0 --- includes/diff/Nodes.php | 439 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 439 insertions(+) create mode 100644 includes/diff/Nodes.php (limited to 'includes/diff/Nodes.php') diff --git a/includes/diff/Nodes.php b/includes/diff/Nodes.php new file mode 100644 index 00000000..1b1363d4 --- /dev/null +++ b/includes/diff/Nodes.php @@ -0,0 +1,439 @@ + + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * or see http://www.gnu.org/ + * + */ + +/** + * Any element in the DOM tree of an HTML document. + * @ingroup DifferenceEngine + */ +class Node { + + public $parent; + + protected $parentTree; + + public $whiteBefore = false; + + public $whiteAfter = false; + + function __construct($parent) { + $this->parent = $parent; + } + + public function getParentTree() { + if (!isset($this->parentTree)) { + if (!is_null($this->parent)) { + $this->parentTree = $this->parent->getParentTree(); + $this->parentTree[] = $this->parent; + } else { + $this->parentTree = array(); + } + } + return $this->parentTree; + } + + public function getLastCommonParent(Node $other) { + $result = new LastCommonParentResult(); + + $myParents = $this->getParentTree(); + $otherParents = $other->getParentTree(); + + $i = 1; + $isSame = true; + $nbMyParents = count($myParents); + $nbOtherParents = count($otherParents); + while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) { + if (!$myParents[$i]->openingTag === $otherParents[$i]->openingTag) { + $isSame = false; + } else { + // After a while, the index i-1 must be the last common parent + $i++; + } + } + + $result->lastCommonParentDepth = $i - 1; + $result->parent = $myParents[$i - 1]; + + if (!$isSame || $nbMyParents > $nbOtherParents) { + // Not all tags matched, or all tags matched but + // there are tags left in this tree + $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]); + $result->splittingNeeded = true; + } else if ($nbMyParents <= $nbOtherParents) { + $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this); + } + return $result; + } + + public function setParent($parent) { + $this->parent = $parent; + unset($this->parentTree); + } + + public function inPre() { + $tree = $this->getParentTree(); + foreach ($tree as &$ancestor) { + if ($ancestor->isPre()) { + return true; + } + } + return false; + } +} + +/** + * Node that can contain other nodes. Represents an HTML tag. + * @ingroup DifferenceEngine + */ +class TagNode extends Node { + + public $children = array(); + + public $qName; + + public $attributes = array(); + + public $openingTag; + + function __construct($parent, $qName, /*array*/ $attributes) { + parent::__construct($parent); + $this->qName = strtolower($qName); + foreach($attributes as $key => &$value){ + $this->attributes[strtolower($key)] = $value; + } + return $this->openingTag = Xml::openElement($this->qName, $this->attributes); + } + + public function addChildAbsolute(Node $node, $index) { + array_splice($this->children, $index, 0, array($node)); + } + + public function getIndexOf(Node $child) { + // don't trust array_search with objects + foreach ($this->children as $key => &$value){ + if ($value === $child) { + return $key; + } + } + return null; + } + + public function getNbChildren() { + return count($this->children); + } + + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { + $nodes = array(); + + $allDeleted = false; + $somethingDeleted = false; + $hasNonDeletedDescendant = false; + + if (empty($this->children)) { + return $nodes; + } + + foreach ($this->children as &$child) { + $allDeleted_local = false; + $somethingDeleted_local = false; + $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local); + if ($somethingDeleted_local) { + $nodes = array_merge($nodes, $childrenChildren); + $somethingDeleted = true; + } + if (!$allDeleted_local) { + $hasNonDeletedDescendant = true; + } + } + if (!$hasNonDeletedDescendant) { + $nodes = array($this); + $allDeleted = true; + } + return $nodes; + } + + public function splitUntil(TagNode $parent, Node $split, $includeLeft) { + $splitOccured = false; + if ($parent !== $this) { + $part1 = new TagNode(null, $this->qName, $this->attributes); + $part2 = new TagNode(null, $this->qName, $this->attributes); + $part1->setParent($this->parent); + $part2->setParent($this->parent); + + $onSplit = false; + $pastSplit = false; + foreach ($this->children as &$child) + { + if ($child === $split) { + $onSplit = true; + } + if(!$pastSplit || ($onSplit && $includeLeft)) { + $child->setParent($part1); + $part1->children[] = $child; + } else { + $child->setParent($part2); + $part2->children[] = $child; + } + if ($onSplit) { + $onSplit = false; + $pastSplit = true; + } + } + $myindexinparent = $this->parent->getIndexOf($this); + if (!empty($part1->children)) { + $this->parent->addChildAbsolute($part1, $myindexinparent); + } + if (!empty($part2->children)) { + $this->parent->addChildAbsolute($part2, $myindexinparent); + } + if (!empty($part1->children) && !empty($part2->children)) { + $splitOccured = true; + } + + $this->parent->removeChild($myindexinparent); + + if ($includeLeft) { + $this->parent->splitUntil($parent, $part1, $includeLeft); + } else { + $this->parent->splitUntil($parent, $part2, $includeLeft); + } + } + return $splitOccured; + + } + + private function removeChild($index) { + unset($this->children[$index]); + $this->children = array_values($this->children); + } + + public static $blocks = array('html', 'body','p','blockquote', 'h1', + 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table', + 'tbody', 'tr', 'td', 'th', 'br'); + + public function copyTree() { + $newThis = new TagNode(null, $this->qName, $this->attributes); + $newThis->whiteBefore = $this->whiteBefore; + $newThis->whiteAfter = $this->whiteAfter; + foreach ($this->children as &$child) { + $newChild = $child->copyTree(); + $newChild->setParent($newThis); + $newThis->children[] = $newChild; + } + return $newThis; + } + + public function getMatchRatio(TagNode $other) { + $txtComp = new TextOnlyComparator($other); + return $txtComp->getMatchRatio(new TextOnlyComparator($this)); + } + + public function expandWhiteSpace() { + $shift = 0; + $spaceAdded = false; + + $nbOriginalChildren = $this->getNbChildren(); + for ($i = 0; $i < $nbOriginalChildren; ++$i) { + $child = $this->children[$i + $shift]; + + if ($child instanceof TagNode) { + if (!$child->isPre()) { + $child->expandWhiteSpace(); + } + } + if (!$spaceAdded && $child->whiteBefore) { + $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild()); + $ws->setParent($this); + $this->addChildAbsolute($ws,$i + ($shift++)); + } + if ($child->whiteAfter) { + $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild()); + $ws->setParent($this); + $this->addChildAbsolute($ws,$i + 1 + ($shift++)); + $spaceAdded = true; + } else { + $spaceAdded = false; + } + + } + } + + public function getLeftMostChild() { + if (empty($this->children)) { + return $this; + } + return $this->children[0]->getLeftMostChild(); + } + + public function getRightMostChild() { + if (empty($this->children)) { + return $this; + } + return $this->children[$this->getNbChildren() - 1]->getRightMostChild(); + } + + public function isPre() { + return 0 == strcasecmp($this->qName,'pre'); + } + + public static function toDiffLine(TagNode $node) { + return $node->openingTag; + } +} + +/** + * Represents a piece of text in the HTML file. + * @ingroup DifferenceEngine + */ +class TextNode extends Node { + + public $text; + + public $modification; + + function __construct($parent, $text) { + parent::__construct($parent); + $this->modification = new Modification(Modification::NONE); + $this->text = $text; + } + + public function copyTree() { + $clone = clone $this; + $clone->setParent(null); + return $clone; + } + + public function getLeftMostChild() { + return $this; + } + + public function getRightMostChild() { + return $this; + } + + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { + if ($this->modification->type == Modification::REMOVED + && $this->modification->id == $id){ + $somethingDeleted = true; + $allDeleted = true; + return array($this); + } + return array(); + } + + public function isSameText($other) { + if (is_null($other) || ! $other instanceof TextNode) { + return false; + } + return str_replace('\n', ' ',$this->text) === str_replace('\n', ' ',$other->text); + } + + public static function toDiffLine(TextNode $node) { + return str_replace('\n', ' ',$node->text); + } +} + +/** + * @todo Document + * @ingroup DifferenceEngine + */ +class WhiteSpaceNode extends TextNode { + + function __construct($parent, $s, Node $like = null) { + parent::__construct($parent, $s); + if(!is_null($like) && $like instanceof TextNode) { + $newModification = clone $like->modification; + $newModification->firstOfID = false; + $this->modification = $newModification; + } + } +} + +/** + * Represents the root of a HTML document. + * @ingroup DifferenceEngine + */ +class BodyNode extends TagNode { + + function __construct() { + parent::__construct(null, 'body', array()); + } + + public function copyTree() { + $newThis = new BodyNode(); + foreach ($this->children as &$child) { + $newChild = $child->copyTree(); + $newChild->setParent($newThis); + $newThis->children[] = $newChild; + } + return $newThis; + } + + public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) { + $nodes = array(); + foreach ($this->children as &$child) { + $childrenChildren = $child->getMinimalDeletedSet($id, + $allDeleted, $somethingDeleted); + $nodes = array_merge($nodes, $childrenChildren); + } + return $nodes; + } + +} + +/** + * Represents an image in HTML. Even though images do not contain any text they + * are independent visible objects on the page. They are logically a TextNode. + * @ingroup DifferenceEngine + */ +class ImageNode extends TextNode { + + public $attributes; + + function __construct(TagNode $parent, /*array*/ $attrs) { + if(!array_key_exists('src', $attrs)) { + HTMLDiffer::diffDebug( "Image without a source\n" ); + parent::__construct($parent, ''); + }else{ + parent::__construct($parent, '' . strtolower($attrs['src']) . ''); + } + $this->attributes = $attrs; + } + + public function isSameText($other) { + if (is_null($other) || ! $other instanceof ImageNode) { + return false; + } + return $this->text === $other->text; + } + +} + +/** + * No-op node + * @ingroup DifferenceEngine + */ +class DummyNode extends Node { + + function __construct() { + // no op + } + +} -- cgit v1.2.3-54-g00ecf