` blocks. # $text = preg_replace_callback('{ (?:\n\n|\A\n?) ( # $1 = the code block -- one or more lines, starting with a space/tab (?> [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces .*\n+ )+ ) ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc }xm', array(&$this, '_doCodeBlocks_callback'), $text); return $text; } function _doCodeBlocks_callback($matches) { $codeblock = $matches[1]; $codeblock = $this->outdent($codeblock); $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); # trim leading newlines and trailing newlines $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); $codeblock = "$codeblock\n"; return "\n\n".$this->hashBlock($codeblock)."\n\n"; } function makeCodeSpan($code) { # # Create a code span markup for $code. Called from handleSpanToken. # $code = htmlspecialchars(trim($code), ENT_NOQUOTES); return $this->hashPart("$code"); } var $em_relist = array( '' => '(?:(? '(?<=\S)(? '(?<=\S)(? '(?:(? '(?<=\S)(? '(?<=\S)(? '(?:(? '(?<=\S)(? '(?<=\S)(?em_relist as $em => $em_re) { foreach ($this->strong_relist as $strong => $strong_re) { # Construct list of allowed token expressions. $token_relist = array(); if (isset($this->em_strong_relist["$em$strong"])) { $token_relist[] = $this->em_strong_relist["$em$strong"]; } $token_relist[] = $em_re; $token_relist[] = $strong_re; # Construct master expression from list. $token_re = '{('. implode('|', $token_relist) .')}'; $this->em_strong_prepared_relist["$em$strong"] = $token_re; } } } function doItalicsAndBold($text) { $token_stack = array(''); $text_stack = array(''); $em = ''; $strong = ''; $tree_char_em = false; while (1) { # # Get prepared regular expression for seraching emphasis tokens # in current context. # $token_re = $this->em_strong_prepared_relist["$em$strong"]; # # Each loop iteration seach for the next emphasis token. # Each token is then passed to handleSpanToken. # $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); $text_stack[0] .= $parts[0]; $token =& $parts[1]; $text =& $parts[2]; if (empty($token)) { # Reached end of text span: empty stack without emitting. # any more emphasis. while ($token_stack[0]) { $text_stack[1] .= array_shift($token_stack); $text_stack[0] .= array_shift($text_stack); } break; } $token_len = strlen($token); if ($tree_char_em) { # Reached closing marker while inside a three-char emphasis. if ($token_len == 3) { # Three-char closing marker, close em and strong. array_shift($token_stack); $span = array_shift($text_stack); $span = $this->runSpanGamut($span); $span = "$span"; $text_stack[0] .= $this->hashPart($span); $em = ''; $strong = ''; } else { # Other closing marker: close one em or strong and # change current token state to match the other $token_stack[0] = str_repeat($token{0}, 3-$token_len); $tag = $token_len == 2 ? "strong" : "em"; $span = $text_stack[0]; $span = $this->runSpanGamut($span); $span = "<$tag>$span"; $text_stack[0] = $this->hashPart($span); $$tag = ''; # $$tag stands for $em or $strong } $tree_char_em = false; } else if ($token_len == 3) { if ($em) { # Reached closing marker for both em and strong. # Closing strong marker: for ($i = 0; $i < 2; ++$i) { $shifted_token = array_shift($token_stack); $tag = strlen($shifted_token) == 2 ? "strong" : "em"; $span = array_shift($text_stack); $span = $this->runSpanGamut($span); $span = "<$tag>$span"; $text_stack[0] .= $this->hashPart($span); $$tag = ''; # $$tag stands for $em or $strong } } else { # Reached opening three-char emphasis marker. Push on token # stack; will be handled by the special condition above. $em = $token{0}; $strong = "$em$em"; array_unshift($token_stack, $token); array_unshift($text_stack, ''); $tree_char_em = true; } } else if ($token_len == 2) { if ($strong) { # Unwind any dangling emphasis marker: if (strlen($token_stack[0]) == 1) { $text_stack[1] .= array_shift($token_stack); $text_stack[0] .= array_shift($text_stack); } # Closing strong marker: array_shift($token_stack); $span = array_shift($text_stack); $span = $this->runSpanGamut($span); $span = "$span"; $text_stack[0] .= $this->hashPart($span); $strong = ''; } else { array_unshift($token_stack, $token); array_unshift($text_stack, ''); $strong = $token; } } else { # Here $token_len == 1 if ($em) { if (strlen($token_stack[0]) == 1) { # Closing emphasis marker: array_shift($token_stack); $span = array_shift($text_stack); $span = $this->runSpanGamut($span); $span = "$span"; $text_stack[0] .= $this->hashPart($span); $em = ''; } else { $text_stack[0] .= $token; } } else { array_unshift($token_stack, $token); array_unshift($text_stack, ''); $em = $token; } } } return $text_stack[0]; } function doBlockQuotes($text) { $text = preg_replace_callback('/ ( # Wrap whole match in $1 (?> ^[ ]*>[ ]? # ">" at the start of a line .+\n # rest of the first line (.+\n)* # subsequent consecutive lines \n* # blanks )+ ) /xm', array(&$this, '_doBlockQuotes_callback'), $text); return $text; } function _doBlockQuotes_callback($matches) { $bq = $matches[1]; # trim one level of quoting - trim whitespace-only lines $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); $bq = $this->runBlockGamut($bq); # recurse $bq = preg_replace('/^/m', " ", $bq); # These leading spaces cause problem with content, # so we need to fix that: $bq = preg_replace_callback('{(\s*.+?)}sx', array(&$this, '_DoBlockQuotes_callback2'), $bq); return "\n". $this->hashBlock("\n$bq\n")."\n\n"; } function _doBlockQuotes_callback2($matches) { $pre = $matches[1]; $pre = preg_replace('/^ /m', '', $pre); return $pre; } function formParagraphs($text) { # # Params: # $text - string to process with html tags # # Strip leading and trailing lines: $text = preg_replace('/\A\n+|\n+\z/', '', $text); $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); # # Wrap tags and unhashify HTML blocks # foreach ($grafs as $key => $value) { if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { # Is a paragraph. $value = $this->runSpanGamut($value); $value = preg_replace('/^([ ]*)/', " ", $value); $value .= ""; $grafs[$key] = $this->unhash($value); } else { # Is a block. # Modify elements of @grafs in-place... $graf = $value; $block = $this->html_hashes[$graf]; $graf = $block; // if (preg_match('{ // \A // ( # $1 = tag // ]* // \b // markdown\s*=\s* ([\'"]) # $2 = attr quote char // 1 // \2 // [^>]* // > // ) // ( # $3 = contents // .* // ) // () # $4 = closing tag // \z // }xs', $block, $matches)) // { // list(, $div_open, , $div_content, $div_close) = $matches; // // # We cannot call Markdown(), because that resets the hash; // # that initialization code should be pulled into its own sub, though. // $div_content = $this->hashHTMLBlocks($div_content); // // # Run document gamut methods on the content. // foreach ($this->document_gamut as $method => $priority) { // $div_content = $this->$method($div_content); // } // // $div_open = preg_replace( // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); // // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; // } $grafs[$key] = $graf; } } return implode("\n\n", $grafs); } function encodeAttribute($text) { # # Encode text for a double-quoted HTML attribute. This function # is *not* suitable for attributes enclosed in single quotes. # $text = $this->encodeAmpsAndAngles($text); $text = str_replace('"', '"', $text); return $text; } function encodeAmpsAndAngles($text) { # # Smart processing for ampersands and angle brackets that need to # be encoded. Valid character entities are left alone unless the # no-entities mode is set. # if ($this->no_entities) { $text = str_replace('&', '&', $text); } else { # Ampersand-encoding based entirely on Nat Irons's Amputator # MT plugin: $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', '&', $text);; } # Encode remaining <'s $text = str_replace('<', '<', $text); return $text; } function doAutoLinks($text) { $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', array(&$this, '_doAutoLinks_url_callback'), $text); # Email addresses: $text = preg_replace_callback('{ < (?:mailto:)? ( [-.\w\x80-\xFF]+ \@ [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ ) > }xi', array(&$this, '_doAutoLinks_email_callback'), $text); return $text; } function _doAutoLinks_url_callback($matches) { $url = $this->encodeAttribute($matches[1]); $link = "$url"; return $this->hashPart($link); } function _doAutoLinks_email_callback($matches) { $address = $matches[1]; $link = $this->encodeEmailAddress($address); return $this->hashPart($link); } function encodeEmailAddress($addr) { # # Input: an email address, e.g. "foo@example.com" # # Output: the email address as a mailto link, with each character # of the address encoded as either a decimal or hex entity, in # the hopes of foiling most address harvesting spam bots. E.g.: # # foo@exampl # e.com # # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. # With some optimizations by Milian Wolff. # $addr = "mailto:" . $addr; $chars = preg_split('/(? $char) { $ord = ord($char); # Ignore non-ascii chars. if ($ord < 128) { $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. # roughly 10% raw, 45% hex, 45% dec # '@' *must* be encoded. I insist. if ($r > 90 && $char != '@') /* do nothing */; else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; else $chars[$key] = '&#'.$ord.';'; } } $addr = implode('', $chars); $text = implode('', array_slice($chars, 7)); # text without `mailto:` $addr = "$text"; return $addr; } function parseSpan($str) { # # Take the string $str and parse it into tokens, hashing embeded HTML, # escaped characters and handling code spans. # $output = ''; $span_re = '{ ( \\\\'.$this->escape_chars_re.' | (?no_markup ? '' : ' | # comment | <\?.*?\?> | <%.*?%> # processing instruction | <[/!$]?[-a-zA-Z0-9:]+ # regular tags (?> \s (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* )? > ').' ) }xs'; while (1) { # # Each loop iteration seach for either the next tag, the next # openning code span marker, or the next escaped character. # Each token is then passed to handleSpanToken. # $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); # Create token from text preceding tag. if ($parts[0] != "") { $output .= $parts[0]; } # Check if we reach the end. if (isset($parts[1])) { $output .= $this->handleSpanToken($parts[1], $parts[2]); $str = $parts[2]; } else { break; } } return $output; } function handleSpanToken($token, &$str) { # # Handle $token provided by parseSpan by determining its nature and # returning the corresponding value that should replace it. # switch ($token{0}) { case "\\": return $this->hashPart("&#". ord($token{1}). ";"); case "`": # Search for end marker in remaining text. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', $str, $matches)) { $str = $matches[2]; $codespan = $this->makeCodeSpan($matches[1]); return $this->hashPart($codespan); } return $token; // return as text since no ending marker found. default: return $this->hashPart($token); } } function outdent($text) { # # Remove one level of line-leading tabs or spaces # return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); } # String length function for detab. `_initDetab` will create a function to # hanlde UTF-8 if the default function does not exist. var $utf8_strlen = 'mb_strlen'; function detab($text) { # # Replace tabs with the appropriate amount of space. # # For each line we separate the line in blocks delemited by # tab characters. Then we reconstruct every line by adding the # appropriate number of space between each blocks. $text = preg_replace_callback('/^.*\t.*$/m', array(&$this, '_detab_callback'), $text); return $text; } function _detab_callback($matches) { $line = $matches[0]; $strlen = $this->utf8_strlen; # strlen function for UTF-8. # Split in blocks. $blocks = explode("\t", $line); # Add each blocks to the line. $line = $blocks[0]; unset($blocks[0]); # Do not add first block twice. foreach ($blocks as $block) { # Calculate amount of space, insert spaces, insert block. $amount = $this->tab_width - $strlen($line, 'UTF-8') % $this->tab_width; $line .= str_repeat(" ", $amount) . $block; } return $line; } function _initDetab() { # # Check for the availability of the function in the `utf8_strlen` property # (initially `mb_strlen`). If the function is not available, create a # function that will loosely count the number of UTF-8 characters with a # regular expression. # if (function_exists($this->utf8_strlen)) return; $this->utf8_strlen = create_function('$text', 'return preg_match_all( "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", $text, $m);'); } function unhash($text) { # # Swap back in all the tags hashed by _HashHTMLBlocks. # return preg_replace_callback('/(.)\x1A[0-9]+\1/', array(&$this, '_unhash_callback'), $text); } function _unhash_callback($matches) { return $this->html_hashes[$matches[0]]; } } /* PHP Markdown ============ Description ----------- This is a PHP translation of the original Markdown formatter written in Perl by John Gruber. Markdown is a text-to-HTML filter; it translates an easy-to-read / easy-to-write structured text format into HTML. Markdown's text format is most similar to that of plain text email, and supports features such as headers, *emphasis*, code blocks, blockquotes, and links. Markdown's syntax is designed not as a generic markup language, but specifically to serve as a front-end to (X)HTML. You can use span-level HTML tags anywhere in a Markdown document, and you can use block level HTML tags (like and as well). For more information about Markdown's syntax, see: Bugs ---- To file bug reports please send email to: Please include with your report: (1) the example input; (2) the output you expected; (3) the output Markdown actually produced. Version History --------------- See the readme file for detailed release notes for this version. Copyright and License --------------------- PHP Markdown Copyright (c) 2004-2008 Michel Fortin All rights reserved. Based on Markdown Copyright (c) 2003-2006 John Gruber All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name "Markdown" nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the copyright owner or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. */ ?>