diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2006-10-11 18:12:39 +0000 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2006-10-11 18:12:39 +0000 |
commit | 183851b06bd6c52f3cae5375f433da720d410447 (patch) | |
tree | a477257decbf3360127f6739c2f9d0ec57a03d39 /includes/cbt |
MediaWiki 1.7.1 wiederhergestellt
Diffstat (limited to 'includes/cbt')
-rw-r--r-- | includes/cbt/CBTCompiler.php | 369 | ||||
-rw-r--r-- | includes/cbt/CBTProcessor.php | 540 | ||||
-rw-r--r-- | includes/cbt/README | 108 |
3 files changed, 1017 insertions, 0 deletions
diff --git a/includes/cbt/CBTCompiler.php b/includes/cbt/CBTCompiler.php new file mode 100644 index 00000000..4ef8ee4a --- /dev/null +++ b/includes/cbt/CBTCompiler.php @@ -0,0 +1,369 @@ +<?php + +/** + * This file contains functions to convert callback templates to other languages. + * The template should first be pre-processed with CBTProcessor to remove static + * sections. + */ + + +require_once( dirname( __FILE__ ) . '/CBTProcessor.php' ); + +/** + * Push a value onto the stack + * Argument 1: value + */ +define( 'CBT_PUSH', 1 ); + +/** + * Pop, concatenate argument, push + * Argument 1: value + */ +define( 'CBT_CAT', 2 ); + +/** + * Concatenate where the argument is on the stack, instead of immediate + */ +define( 'CBT_CATS', 3 ); + +/** + * Call a function, push the return value onto the stack and put it in the cache + * Argument 1: argument count + * + * The arguments to the function are on the stack + */ +define( 'CBT_CALL', 4 ); + +/** + * Pop, htmlspecialchars, push + */ +define( 'CBT_HX', 5 ); + +class CBTOp { + var $opcode; + var $arg1; + var $arg2; + + function CBTOp( $opcode, $arg1, $arg2 ) { + $this->opcode = $opcode; + $this->arg1 = $arg1; + $this->arg2 = $arg2; + } + + function name() { + $opcodeNames = array( + CBT_PUSH => 'PUSH', + CBT_CAT => 'CAT', + CBT_CATS => 'CATS', + CBT_CALL => 'CALL', + CBT_HX => 'HX', + ); + return $opcodeNames[$this->opcode]; + } +}; + +class CBTCompiler { + var $mOps = array(); + var $mCode; + + function CBTCompiler( $text ) { + $this->mText = $text; + } + + /** + * Compile the text. + * Returns true on success, error message on failure + */ + function compile() { + $fname = 'CBTProcessor::compile'; + $this->mLastError = false; + $this->mOps = array(); + + $this->doText( 0, strlen( $this->mText ) ); + + if ( $this->mLastError !== false ) { + $pos = $this->mErrorPos; + + // Find the line number at which the error occurred + $startLine = 0; + $endLine = 0; + $line = 0; + do { + if ( $endLine ) { + $startLine = $endLine + 1; + } + $endLine = strpos( $this->mText, "\n", $startLine ); + ++$line; + } while ( $endLine !== false && $endLine < $pos ); + + $text = "Template error at line $line: $this->mLastError\n<pre>\n"; + + $context = rtrim( str_replace( "\t", " ", substr( $this->mText, $startLine, $endLine - $startLine ) ) ); + $text .= htmlspecialchars( $context ) . "\n" . str_repeat( ' ', $pos - $startLine ) . "^\n</pre>\n"; + } else { + $text = true; + } + + return $text; + } + + /** Shortcut for doOpenText( $start, $end, false */ + function doText( $start, $end ) { + return $this->doOpenText( $start, $end, false ); + } + + function phpQuote( $text ) { + return "'" . strtr( $text, array( "\\" => "\\\\", "'" => "\\'" ) ) . "'"; + } + + function op( $opcode, $arg1 = null, $arg2 = null) { + return new CBTOp( $opcode, $arg1, $arg2 ); + } + + /** + * Recursive workhorse for text mode. + * + * Processes text mode starting from offset $p, until either $end is + * reached or a closing brace is found. If $needClosing is false, a + * closing brace will flag an error, if $needClosing is true, the lack + * of a closing brace will flag an error. + * + * The parameter $p is advanced to the position after the closing brace, + * or after the end. A CBTValue is returned. + * + * @private + */ + function doOpenText( &$p, $end, $needClosing = true ) { + $in =& $this->mText; + $start = $p; + $atStart = true; + + $foundClosing = false; + while ( $p < $end ) { + $matchLength = strcspn( $in, CBT_BRACE, $p, $end - $p ); + $pToken = $p + $matchLength; + + if ( $pToken >= $end ) { + // No more braces, output remainder + if ( $atStart ) { + $this->mOps[] = $this->op( CBT_PUSH, substr( $in, $p ) ); + $atStart = false; + } else { + $this->mOps[] = $this->op( CBT_CAT, substr( $in, $p ) ); + } + $p = $end; + break; + } + + // Output the text before the brace + if ( $atStart ) { + $this->mOps[] = $this->op( CBT_PUSH, substr( $in, $p, $matchLength ) ); + $atStart = false; + } else { + $this->mOps[] = $this->op( CBT_CAT, substr( $in, $p, $matchLength ) ); + } + + // Advance the pointer + $p = $pToken + 1; + + // Check for closing brace + if ( $in[$pToken] == '}' ) { + $foundClosing = true; + break; + } + + // Handle the "{fn}" special case + if ( $pToken > 0 && $in[$pToken-1] == '"' ) { + $this->doOpenFunction( $p, $end ); + if ( $p < $end && $in[$p] == '"' ) { + $this->mOps[] = $this->op( CBT_HX ); + } + } else { + $this->doOpenFunction( $p, $end ); + } + if ( $atStart ) { + $atStart = false; + } else { + $this->mOps[] = $this->op( CBT_CATS ); + } + } + if ( $foundClosing && !$needClosing ) { + $this->error( 'Errant closing brace', $p ); + } elseif ( !$foundClosing && $needClosing ) { + $this->error( 'Unclosed text section', $start ); + } else { + if ( $atStart ) { + $this->mOps[] = $this->op( CBT_PUSH, '' ); + } + } + } + + /** + * Recursive workhorse for function mode. + * + * Processes function mode starting from offset $p, until either $end is + * reached or a closing brace is found. If $needClosing is false, a + * closing brace will flag an error, if $needClosing is true, the lack + * of a closing brace will flag an error. + * + * The parameter $p is advanced to the position after the closing brace, + * or after the end. A CBTValue is returned. + * + * @private + */ + function doOpenFunction( &$p, $end, $needClosing = true ) { + $in =& $this->mText; + $start = $p; + $argCount = 0; + + $foundClosing = false; + while ( $p < $end ) { + $char = $in[$p]; + if ( $char == '{' ) { + // Switch to text mode + ++$p; + $tokenStart = $p; + $this->doOpenText( $p, $end ); + ++$argCount; + } elseif ( $char == '}' ) { + // Block end + ++$p; + $foundClosing = true; + break; + } elseif ( false !== strpos( CBT_WHITE, $char ) ) { + // Whitespace + // Consume the rest of the whitespace + $p += strspn( $in, CBT_WHITE, $p, $end - $p ); + } else { + // Token, find the end of it + $tokenLength = strcspn( $in, CBT_DELIM, $p, $end - $p ); + $this->mOps[] = $this->op( CBT_PUSH, substr( $in, $p, $tokenLength ) ); + + // Execute the token as a function if it's not the function name + if ( $argCount ) { + $this->mOps[] = $this->op( CBT_CALL, 1 ); + } + + $p += $tokenLength; + ++$argCount; + } + } + if ( !$foundClosing && $needClosing ) { + $this->error( 'Unclosed function', $start ); + return ''; + } + + $this->mOps[] = $this->op( CBT_CALL, $argCount ); + } + + /** + * Set a flag indicating that an error has been found. + */ + function error( $text, $pos = false ) { + $this->mLastError = $text; + if ( $pos === false ) { + $this->mErrorPos = $this->mCurrentPos; + } else { + $this->mErrorPos = $pos; + } + } + + function getLastError() { + return $this->mLastError; + } + + function opsToString() { + $s = ''; + foreach( $this->mOps as $op ) { + $s .= $op->name(); + if ( !is_null( $op->arg1 ) ) { + $s .= ' ' . var_export( $op->arg1, true ); + } + if ( !is_null( $op->arg2 ) ) { + $s .= ' ' . var_export( $op->arg2, true ); + } + $s .= "\n"; + } + return $s; + } + + function generatePHP( $functionObj ) { + $fname = 'CBTCompiler::generatePHP'; + wfProfileIn( $fname ); + $stack = array(); + + foreach( $this->mOps as $index => $op ) { + switch( $op->opcode ) { + case CBT_PUSH: + $stack[] = $this->phpQuote( $op->arg1 ); + break; + case CBT_CAT: + $val = array_pop( $stack ); + array_push( $stack, "$val . " . $this->phpQuote( $op->arg1 ) ); + break; + case CBT_CATS: + $right = array_pop( $stack ); + $left = array_pop( $stack ); + array_push( $stack, "$left . $right" ); + break; + case CBT_CALL: + $args = array_slice( $stack, count( $stack ) - $op->arg1, $op->arg1 ); + $stack = array_slice( $stack, 0, count( $stack ) - $op->arg1 ); + + // Some special optimised expansions + if ( $op->arg1 == 0 ) { + $result = ''; + } else { + $func = array_shift( $args ); + if ( substr( $func, 0, 1 ) == "'" && substr( $func, -1 ) == "'" ) { + $func = substr( $func, 1, strlen( $func ) - 2 ); + if ( $func == "if" ) { + if ( $op->arg1 < 3 ) { + // This should have been caught during processing + return "Not enough arguments to if"; + } elseif ( $op->arg1 == 3 ) { + $result = "(({$args[0]} != '') ? ({$args[1]}) : '')"; + } else { + $result = "(({$args[0]} != '') ? ({$args[1]}) : ({$args[2]}))"; + } + } elseif ( $func == "true" ) { + $result = "true"; + } elseif( $func == "lbrace" || $func == "{" ) { + $result = "{"; + } elseif( $func == "rbrace" || $func == "}" ) { + $result = "}"; + } elseif ( $func == "escape" || $func == "~" ) { + $result = "htmlspecialchars({$args[0]})"; + } else { + // Known function name + $result = "{$functionObj}->{$func}(" . implode( ', ', $args ) . ')'; + } + } else { + // Unknown function name + $result = "call_user_func(array($functionObj, $func), " . implode( ', ', $args ) . ' )'; + } + } + array_push( $stack, $result ); + break; + case CBT_HX: + $val = array_pop( $stack ); + array_push( $stack, "htmlspecialchars( $val )" ); + break; + default: + return "Unknown opcode {$op->opcode}\n"; + } + } + wfProfileOut( $fname ); + if ( count( $stack ) !== 1 ) { + return "Error, stack count incorrect\n"; + } + return ' + global $cbtExecutingGenerated; + ++$cbtExecutingGenerated; + $output = ' . $stack[0] . '; + --$cbtExecutingGenerated; + return $output; + '; + } +} +?> diff --git a/includes/cbt/CBTProcessor.php b/includes/cbt/CBTProcessor.php new file mode 100644 index 00000000..0c34204e --- /dev/null +++ b/includes/cbt/CBTProcessor.php @@ -0,0 +1,540 @@ +<?php + +/** + * PHP version of the callback template processor + * This is currently used as a test rig and is likely to be used for + * compatibility purposes later, where the C++ extension is not available. + */ + +define( 'CBT_WHITE', " \t\r\n" ); +define( 'CBT_BRACE', '{}' ); +define( 'CBT_DELIM', CBT_WHITE . CBT_BRACE ); +define( 'CBT_DEBUG', 0 ); + +$GLOBALS['cbtExecutingGenerated'] = 0; + +/** + * Attempting to be a MediaWiki-independent module + */ +if ( !function_exists( 'wfProfileIn' ) ) { + function wfProfileIn() {} +} +if ( !function_exists( 'wfProfileOut' ) ) { + function wfProfileOut() {} +} + +/** + * Escape text for inclusion in template + */ +function cbt_escape( $text ) { + return strtr( $text, array( '{' => '{[}', '}' => '{]}' ) ); +} + +/** + * Create a CBTValue + */ +function cbt_value( $text = '', $deps = array(), $isTemplate = false ) { + global $cbtExecutingGenerated; + if ( $cbtExecutingGenerated ) { + return $text; + } else { + return new CBTValue( $text, $deps, $isTemplate ); + } +} + +/** + * A dependency-tracking value class + * Callback functions should return one of these, unless they have + * no dependencies in which case they can return a string. + */ +class CBTValue { + var $mText, $mDeps, $mIsTemplate; + + /** + * Create a new value + * @param $text String: , default ''. + * @param $deps Array: what this value depends on + * @param $isTemplate Bool: whether the result needs compilation/execution, default 'false'. + */ + function CBTValue( $text = '', $deps = array(), $isTemplate = false ) { + $this->mText = $text; + if ( !is_array( $deps ) ) { + $this->mDeps = array( $deps ) ; + } else { + $this->mDeps = $deps; + } + $this->mIsTemplate = $isTemplate; + } + + /** Concatenate two values, merging their dependencies */ + function cat( $val ) { + if ( is_object( $val ) ) { + $this->addDeps( $val ); + $this->mText .= $val->mText; + } else { + $this->mText .= $val; + } + } + + /** Add the dependencies of another value to this one */ + function addDeps( $values ) { + if ( !is_array( $values ) ) { + $this->mDeps = array_merge( $this->mDeps, $values->mDeps ); + } else { + foreach ( $values as $val ) { + if ( !is_object( $val ) ) { + var_dump( debug_backtrace() ); + exit; + } + $this->mDeps = array_merge( $this->mDeps, $val->mDeps ); + } + } + } + + /** Remove a list of dependencies */ + function removeDeps( $deps ) { + $this->mDeps = array_diff( $this->mDeps, $deps ); + } + + function setText( $text ) { + $this->mText = $text; + } + + function getText() { + return $this->mText; + } + + function getDeps() { + return $this->mDeps; + } + + /** If the value is a template, execute it */ + function execute( &$processor ) { + if ( $this->mIsTemplate ) { + $myProcessor = new CBTProcessor( $this->mText, $processor->mFunctionObj, $processor->mIgnorableDeps ); + $myProcessor->mCompiling = $processor->mCompiling; + $val = $myProcessor->doText( 0, strlen( $this->mText ) ); + if ( $myProcessor->getLastError() ) { + $processor->error( $myProcessor->getLastError() ); + $this->mText = ''; + } else { + $this->mText = $val->mText; + $this->addDeps( $val ); + } + if ( !$processor->mCompiling ) { + $this->mIsTemplate = false; + } + } + } + + /** If the value is plain text, escape it for inclusion in a template */ + function templateEscape() { + if ( !$this->mIsTemplate ) { + $this->mText = cbt_escape( $this->mText ); + } + } + + /** Return true if the value has no dependencies */ + function isStatic() { + return count( $this->mDeps ) == 0; + } +} + +/** + * Template processor, for compilation and execution + */ +class CBTProcessor { + var $mText, # The text being processed + $mFunctionObj, # The object containing callback functions + $mCompiling = false, # True if compiling to a template, false if executing to text + $mIgnorableDeps = array(), # Dependency names which should be treated as static + $mFunctionCache = array(), # A cache of function results keyed by argument hash + $mLastError = false, # Last error message or false for no error + $mErrorPos = 0, # Last error position + + /** Built-in functions */ + $mBuiltins = array( + 'if' => 'bi_if', + 'true' => 'bi_true', + '[' => 'bi_lbrace', + 'lbrace' => 'bi_lbrace', + ']' => 'bi_rbrace', + 'rbrace' => 'bi_rbrace', + 'escape' => 'bi_escape', + '~' => 'bi_escape', + ); + + /** + * Create a template processor for a given text, callback object and static dependency list + */ + function CBTProcessor( $text, $functionObj, $ignorableDeps = array() ) { + $this->mText = $text; + $this->mFunctionObj = $functionObj; + $this->mIgnorableDeps = $ignorableDeps; + } + + /** + * Execute the template. + * If $compile is true, produces an optimised template where functions with static + * dependencies have been replaced by their return values. + */ + function execute( $compile = false ) { + $fname = 'CBTProcessor::execute'; + wfProfileIn( $fname ); + $this->mCompiling = $compile; + $this->mLastError = false; + $val = $this->doText( 0, strlen( $this->mText ) ); + $text = $val->getText(); + if ( $this->mLastError !== false ) { + $pos = $this->mErrorPos; + + // Find the line number at which the error occurred + $startLine = 0; + $endLine = 0; + $line = 0; + do { + if ( $endLine ) { + $startLine = $endLine + 1; + } + $endLine = strpos( $this->mText, "\n", $startLine ); + ++$line; + } while ( $endLine !== false && $endLine < $pos ); + + $text = "Template error at line $line: $this->mLastError\n<pre>\n"; + + $context = rtrim( str_replace( "\t", " ", substr( $this->mText, $startLine, $endLine - $startLine ) ) ); + $text .= htmlspecialchars( $context ) . "\n" . str_repeat( ' ', $pos - $startLine ) . "^\n</pre>\n"; + } + wfProfileOut( $fname ); + return $text; + } + + /** Shortcut for execute(true) */ + function compile() { + $fname = 'CBTProcessor::compile'; + wfProfileIn( $fname ); + $s = $this->execute( true ); + wfProfileOut( $fname ); + return $s; + } + + /** Shortcut for doOpenText( $start, $end, false */ + function doText( $start, $end ) { + return $this->doOpenText( $start, $end, false ); + } + + /** + * Escape text for a template if we are producing a template. Do nothing + * if we are producing plain text. + */ + function templateEscape( $text ) { + if ( $this->mCompiling ) { + return cbt_escape( $text ); + } else { + return $text; + } + } + + /** + * Recursive workhorse for text mode. + * + * Processes text mode starting from offset $p, until either $end is + * reached or a closing brace is found. If $needClosing is false, a + * closing brace will flag an error, if $needClosing is true, the lack + * of a closing brace will flag an error. + * + * The parameter $p is advanced to the position after the closing brace, + * or after the end. A CBTValue is returned. + * + * @private + */ + function doOpenText( &$p, $end, $needClosing = true ) { + $fname = 'CBTProcessor::doOpenText'; + wfProfileIn( $fname ); + $in =& $this->mText; + $start = $p; + $ret = new CBTValue( '', array(), $this->mCompiling ); + + $foundClosing = false; + while ( $p < $end ) { + $matchLength = strcspn( $in, CBT_BRACE, $p, $end - $p ); + $pToken = $p + $matchLength; + + if ( $pToken >= $end ) { + // No more braces, output remainder + $ret->cat( substr( $in, $p ) ); + $p = $end; + break; + } + + // Output the text before the brace + $ret->cat( substr( $in, $p, $matchLength ) ); + + // Advance the pointer + $p = $pToken + 1; + + // Check for closing brace + if ( $in[$pToken] == '}' ) { + $foundClosing = true; + break; + } + + // Handle the "{fn}" special case + if ( $pToken > 0 && $in[$pToken-1] == '"' ) { + wfProfileOut( $fname ); + $val = $this->doOpenFunction( $p, $end ); + wfProfileIn( $fname ); + if ( $p < $end && $in[$p] == '"' ) { + $val->setText( htmlspecialchars( $val->getText() ) ); + } + $ret->cat( $val ); + } else { + // Process the function mode component + wfProfileOut( $fname ); + $ret->cat( $this->doOpenFunction( $p, $end ) ); + wfProfileIn( $fname ); + } + } + if ( $foundClosing && !$needClosing ) { + $this->error( 'Errant closing brace', $p ); + } elseif ( !$foundClosing && $needClosing ) { + $this->error( 'Unclosed text section', $start ); + } + wfProfileOut( $fname ); + return $ret; + } + + /** + * Recursive workhorse for function mode. + * + * Processes function mode starting from offset $p, until either $end is + * reached or a closing brace is found. If $needClosing is false, a + * closing brace will flag an error, if $needClosing is true, the lack + * of a closing brace will flag an error. + * + * The parameter $p is advanced to the position after the closing brace, + * or after the end. A CBTValue is returned. + * + * @private + */ + function doOpenFunction( &$p, $end, $needClosing = true ) { + $in =& $this->mText; + $start = $p; + $tokens = array(); + $unexecutedTokens = array(); + + $foundClosing = false; + while ( $p < $end ) { + $char = $in[$p]; + if ( $char == '{' ) { + // Switch to text mode + ++$p; + $tokenStart = $p; + $token = $this->doOpenText( $p, $end ); + $tokens[] = $token; + $unexecutedTokens[] = '{' . substr( $in, $tokenStart, $p - $tokenStart - 1 ) . '}'; + } elseif ( $char == '}' ) { + // Block end + ++$p; + $foundClosing = true; + break; + } elseif ( false !== strpos( CBT_WHITE, $char ) ) { + // Whitespace + // Consume the rest of the whitespace + $p += strspn( $in, CBT_WHITE, $p, $end - $p ); + } else { + // Token, find the end of it + $tokenLength = strcspn( $in, CBT_DELIM, $p, $end - $p ); + $token = new CBTValue( substr( $in, $p, $tokenLength ) ); + // Execute the token as a function if it's not the function name + if ( count( $tokens ) ) { + $tokens[] = $this->doFunction( array( $token ), $p ); + } else { + $tokens[] = $token; + } + $unexecutedTokens[] = $token->getText(); + + $p += $tokenLength; + } + } + if ( !$foundClosing && $needClosing ) { + $this->error( 'Unclosed function', $start ); + return ''; + } + + $val = $this->doFunction( $tokens, $start ); + if ( $this->mCompiling && !$val->isStatic() ) { + $compiled = ''; + $first = true; + foreach( $tokens as $i => $token ) { + if ( $first ) { + $first = false; + } else { + $compiled .= ' '; + } + if ( $token->isStatic() ) { + if ( $i !== 0 ) { + $compiled .= '{' . $token->getText() . '}'; + } else { + $compiled .= $token->getText(); + } + } else { + $compiled .= $unexecutedTokens[$i]; + } + } + + // The dynamic parts of the string are still represented as functions, and + // function invocations have no dependencies. Thus the compiled result has + // no dependencies. + $val = new CBTValue( "{{$compiled}}", array(), true ); + } + return $val; + } + + /** + * Execute a function, caching and returning the result value. + * $tokens is an array of CBTValue objects. $tokens[0] is the function + * name, the others are arguments. $p is the string position, and is used + * for error messages only. + */ + function doFunction( $tokens, $p ) { + if ( count( $tokens ) == 0 ) { + return new CBTValue; + } + $fname = 'CBTProcessor::doFunction'; + wfProfileIn( $fname ); + + $ret = new CBTValue; + + // All functions implicitly depend on their arguments, and the function name + // While this is not strictly necessary for all functions, it's true almost + // all the time and so convenient to do automatically. + $ret->addDeps( $tokens ); + + $this->mCurrentPos = $p; + $func = array_shift( $tokens ); + $func = $func->getText(); + + // Extract the text component from all the tokens + // And convert any templates to plain text + $textArgs = array(); + foreach ( $tokens as $token ) { + $token->execute( $this ); + $textArgs[] = $token->getText(); + } + + // Try the local cache + $cacheKey = $func . "\n" . implode( "\n", $textArgs ); + if ( isset( $this->mFunctionCache[$cacheKey] ) ) { + $val = $this->mFunctionCache[$cacheKey]; + } elseif ( isset( $this->mBuiltins[$func] ) ) { + $func = $this->mBuiltins[$func]; + $val = call_user_func_array( array( &$this, $func ), $tokens ); + $this->mFunctionCache[$cacheKey] = $val; + } elseif ( method_exists( $this->mFunctionObj, $func ) ) { + $profName = get_class( $this->mFunctionObj ) . '::' . $func; + wfProfileIn( "$fname-callback" ); + wfProfileIn( $profName ); + $val = call_user_func_array( array( &$this->mFunctionObj, $func ), $textArgs ); + wfProfileOut( $profName ); + wfProfileOut( "$fname-callback" ); + $this->mFunctionCache[$cacheKey] = $val; + } else { + $this->error( "Call of undefined function \"$func\"", $p ); + $val = new CBTValue; + } + if ( !is_object( $val ) ) { + $val = new CBTValue((string)$val); + } + + if ( CBT_DEBUG ) { + $unexpanded = $val; + } + + // If the output was a template, execute it + $val->execute( $this ); + + if ( $this->mCompiling ) { + // Escape any braces so that the output will be a valid template + $val->templateEscape(); + } + $val->removeDeps( $this->mIgnorableDeps ); + $ret->addDeps( $val ); + $ret->setText( $val->getText() ); + + if ( CBT_DEBUG ) { + wfDebug( "doFunction $func args = " + . var_export( $tokens, true ) + . "unexpanded return = " + . var_export( $unexpanded, true ) + . "expanded return = " + . var_export( $ret, true ) + ); + } + + wfProfileOut( $fname ); + return $ret; + } + + /** + * Set a flag indicating that an error has been found. + */ + function error( $text, $pos = false ) { + $this->mLastError = $text; + if ( $pos === false ) { + $this->mErrorPos = $this->mCurrentPos; + } else { + $this->mErrorPos = $pos; + } + } + + function getLastError() { + return $this->mLastError; + } + + /** 'if' built-in function */ + function bi_if( $condition, $trueBlock, $falseBlock = null ) { + if ( is_null( $condition ) ) { + $this->error( "Missing condition in if" ); + return ''; + } + + if ( $condition->getText() != '' ) { + return new CBTValue( $trueBlock->getText(), + array_merge( $condition->getDeps(), $trueBlock->getDeps() ), + $trueBlock->mIsTemplate ); + } else { + if ( !is_null( $falseBlock ) ) { + return new CBTValue( $falseBlock->getText(), + array_merge( $condition->getDeps(), $falseBlock->getDeps() ), + $falseBlock->mIsTemplate ); + } else { + return new CBTValue( '', $condition->getDeps() ); + } + } + } + + /** 'true' built-in function */ + function bi_true() { + return "true"; + } + + /** left brace built-in */ + function bi_lbrace() { + return '{'; + } + + /** right brace built-in */ + function bi_rbrace() { + return '}'; + } + + /** + * escape built-in. + * Escape text for inclusion in an HTML attribute + */ + function bi_escape( $val ) { + return new CBTValue( htmlspecialchars( $val->getText() ), $val->getDeps() ); + } +} +?> diff --git a/includes/cbt/README b/includes/cbt/README new file mode 100644 index 00000000..cffcef2f --- /dev/null +++ b/includes/cbt/README @@ -0,0 +1,108 @@ +Overview +-------- + +CBT (callback-based templates) is an experimental system for improving skin +rendering time in MediaWiki and similar applications. The fundamental concept is +a template language which contains tags which pull text from PHP callbacks. +These PHP callbacks do not simply return text, they also return a description of +the dependencies -- the global data upon which the returned text depends. This +allows a compiler to produce a template optimised for a certain context. For +example, a user-dependent template can be produced, with the username replaced +by static text, as well as all user preference dependent text. + +This was an experimental project to prove the concept -- to explore possible +efficiency gains and techniques. TemplateProcessor was the first element of this +experiment. It is a class written in PHP which parses a template, and produces +either an optimised template with dependencies removed, or the output text +itself. I found that even with a heavily optimised template, this processor was +not fast enough to match the speed of the original MonoBook. + +To improve the efficiency, I wrote TemplateCompiler, which takes a template, +preferably pre-optimised by TemplateProcessor, and generates PHP code from it. +The generated code is a single expression, concatenating static text and +callback results. This approach turned out to be very efficient, making +significant time savings compared to the original MonoBook. + +Despite this success, the code has been shelved for the time being. There were +a number of unresolved implementation problems, and I felt that there were more +pressing priorities for MediaWiki development than solving them and bringing +this module to completion. I also believe that more research is needed into +other possible template architectures. There is nothing fundamentally wrong with +the CBT concept, and I would encourage others to continue its development. + +The problems I saw were: + +* Extensibility. Can non-Wikimedia installations easily extend and modify CBT + skins? Patching seems to be necessary, is this acceptable? MediaWiki + extensions are another problem. Unless the interfaces allow them to return + dependencies, any hooks will have to be marked dynamic and thus inefficient. + +* Cache invalidation. This is a simple implementation issue, although it would + require extensive modification to the MediaWiki core. + +* Syntax. The syntax is minimalistic and easy to parse, but can be quite ugly. + Will generations of MediaWiki users curse my name? + +* Security. The code produced by TemplateCompiler is best stored in memcached + and executed with eval(). This allows anyone with access to the memcached port + to run code as the apache user. + + +Template syntax +--------------- + +There are two modes: text mode and function mode. The brace characters "{" +and "}" are the only reserved characters. Either one of them will switch from +text mode to function mode wherever they appear, no exceptions. + +In text mode, all characters are passed through to the output. In function +mode, text is split into tokens, delimited either by whitespace or by +matching pairs of braces. The first token is taken to be a function name. The +other tokens are first processed in function mode themselves, then they are +passed to the named function as parameters. The return value of the function +is passed through to the output. + +Example: + {escape {"hello"}} + +First brace switches to function mode. The function name is escape, the first +and only parameter is {"hello"}. This parameter is executed. The braces around +the parameter cause the parser to switch to text mode, thus the string "hello", +including the quotes, is passed back and used as an argument to the escape +function. + +Example: + {if title {<h1>{title}</h1>}} + +The function name is "if". The first parameter is the result of calling the +function "title". The second parameter is a level 1 HTML heading containing +the result of the function "title". "if" is a built-in function which will +return the second parameter only if the first is non-blank, so the effect of +this is to return a heading element only if a title exists. + +As a shortcut for generation of HTML attributes, if a function mode segment is +surrounded by double quotes, quote characters in the return value will be +escaped. This only applies if the quote character immediately precedes the +opening brace, and immediately follows the closing brace, with no whitespace. + +User callback functions are defined by passing a function object to the +template processor. Function names appearing in the text are first checked +against built-in function names, then against the method names in the function +object. The function object forms a sandbox for execution of the template, so +security-conscious users may wish to avoid including functions that allow +arbitrary filesystem access or code execution. + +The callback function will receive its parameters as strings. If the +result of the function depends only on the arguments, and certain things +understood to be "static", such as the source code, then the callback function +should return a string. If the result depends on other things, then the function +should call cbt_value() to get a return value: + + return cbt_value( $text, $deps ); + +where $deps is an array of string tokens, each one naming a dependency. As a +shortcut, if there is only one dependency, $deps may be a string. + + +--------------------- +Tim Starling 2006 |