From 8149be213f1c8f52b0dbe6c213f6073af57fa954 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 27 May 2013 21:39:43 +0200 Subject: filters: import more modern scripts Signed-off-by: Jason A. Donenfeld --- filters/about-formatting.sh | 27 + filters/html-converters/man2html | 5 + filters/html-converters/md2html | 2 + filters/html-converters/resources/markdown.pl | 1731 ++++++++++++++++++++ filters/html-converters/resources/rst-template.txt | 4 + filters/html-converters/rst2html | 2 + filters/html-converters/txt2html | 4 + filters/syntax-highlighting.py | 38 + 8 files changed, 1813 insertions(+) create mode 100755 filters/about-formatting.sh create mode 100755 filters/html-converters/man2html create mode 100755 filters/html-converters/md2html create mode 100755 filters/html-converters/resources/markdown.pl create mode 100644 filters/html-converters/resources/rst-template.txt create mode 100755 filters/html-converters/rst2html create mode 100755 filters/html-converters/txt2html create mode 100755 filters/syntax-highlighting.py (limited to 'filters') diff --git a/filters/about-formatting.sh b/filters/about-formatting.sh new file mode 100755 index 0000000..313a4e6 --- /dev/null +++ b/filters/about-formatting.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# This may be used with the about-filter or repo.about-filter setting in cgitrc. +# It passes formatting of about pages to differing programs, depending on the usage. + +# Markdown support requires perl. +# RestructuredText support requires python and docutils. +# Man page support requires groff. + +# The following environment variables can be used to retrieve the configuration +# of the repository for which this script is called: +# CGIT_REPO_URL ( = repo.url setting ) +# CGIT_REPO_NAME ( = repo.name setting ) +# CGIT_REPO_PATH ( = repo.path setting ) +# CGIT_REPO_OWNER ( = repo.owner setting ) +# CGIT_REPO_DEFBRANCH ( = repo.defbranch setting ) +# CGIT_REPO_SECTION ( = section setting ) +# CGIT_REPO_CLONE_URL ( = repo.clone-url setting ) + +cd "$(dirname $0)/html-converters/" +case "$(tr '[:upper:]' '[:lower:]' <<<"$1")" in + *.md|*.mkd) exec ./md2html; ;; + *.rst) exec ./rst2html; ;; + *.[1-9]) exec ./man2html; ;; + *.htm|*.html) exec cat; ;; + *.txt|*) exec ./txt2html; ;; +esac diff --git a/filters/html-converters/man2html b/filters/html-converters/man2html new file mode 100755 index 0000000..1b28437 --- /dev/null +++ b/filters/html-converters/man2html @@ -0,0 +1,5 @@ +#!/bin/sh +echo "

" +groff -mandoc -T html -P -r -P -l | egrep -v '(||||||||" + diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html new file mode 100755 index 0000000..5cab749 --- /dev/null +++ b/filters/html-converters/md2html @@ -0,0 +1,2 @@ +#!/bin/sh +exec "$(dirname "$0")/resources/markdown.pl" diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl new file mode 100755 index 0000000..abec173 --- /dev/null +++ b/filters/html-converters/resources/markdown.pl @@ -0,0 +1,1731 @@ +#!/usr/bin/perl + +# +# Markdown -- A text-to-HTML conversion tool for web writers +# +# Copyright (c) 2004 John Gruber +# +# + + +package Markdown; +require 5.006_000; +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION); +$VERSION = '1.0.1'; +# Tue 14 Dec 2004 + +## Disabled; causes problems under Perl 5.6.1: +use utf8; +binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + + +# +# Global default settings: +# +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_tab_width = 4; + + +# +# Globals: +# + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my $g_nested_brackets; +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_html_blocks; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "Markdown", + description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", + doc_link => 'http://daringfireball.net/projects/markdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('markdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('markdown' => { + label => 'Markdown', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('markdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('markdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('markdown_with_smartypants' => { + label => 'Markdown With SmartyPants', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n"; + print "Copyright 2004 John Gruber\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print <<'EOT'; + +EOT + print "

"; + print Markdown($text); + print "

"; + } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialChars(), so that any *'s or _'s in the +# and tags get encoded. +# + my $text = shift; + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); + + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = _Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); + + # Strip link definitions, store in hashes. + $text = _StripLinkDefinitions($text); + + $text = _RunBlockGamut($text); + + $text = _UnescapeSpecialChars($text); + + return $text . "\n"; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + } + {}mx) { + $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; + } + } + + return $text; +} + + +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap

s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; + my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + + # First, look for nested blocks, e.g.: + #

+ #

+ # tags for inner block must be indented. + #

+ #

+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `

` and stop at the first `

`. + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_a) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + + + # + # Now match more liberally, simply from `\n` to `\n` + # + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_b) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + # Special case just for

. It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoHeaders($text); + + # Do Horizontal Rules: + $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags. + $text = _HashHTMLBlocks($text); + + $text = _FormParagraphs($text); + + return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoCodeSpans($text); + + $text = _EscapeSpecialChars($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = _DoImages($text); + $text = _DoAnchors($text); + + # Make links out of things like `` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](). + $text = _DoAutoLinks($text); + + $text = _EncodeAmpsAndAngles($text); + + $text = _DoItalicsAndBold($text); + + # Do hard breaks: + $text =~ s/ {2,}\n/ or

 tags.
+# 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+	foreach my $cur_token (@$tokens) {
+		if ($cur_token->[0] eq "tag") {
+			# Within tags, encode * and _ so they don't conflict
+			# with their use in Markdown for italics and strong.
+			# We're replacing each such character with its
+			# corresponding MD5 checksum value; this is likely
+			# overkill, but it should prevent us from colliding
+			# with the escape values by accident.
+			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
+			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
+			$text .= $cur_token->[1];
+		} else {
+			my $t = $cur_token->[1];
+			$t = _EncodeBackslashEscapes($t);
+			$text .= $t;
+		}
+	}
+	return $text;
+}
+
+
+sub _DoAnchors {
+#
+# Turn Markdown link shortcuts into XHTML

 tags.
+#
+	my $text = shift;
+
+	#
+	# First, handle reference-style links: [link text] [id]
+	#
+	$text =~ s{
+		(					# wrap whole match in $1
+		  \[
+		    ($g_nested_brackets)	# link text = $2
+		  \]
+
+		  [ ]?				# one optional space
+		  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+		  \[
+		    (.*?)		# id = $3
+		  \]
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $link_text   = $2;
+		my $link_id     = lc $3;
+
+		if ($link_id eq "") {
+			$link_id = lc $link_text;     # for shortcut links like [this][].
+		}
+
+		if (defined $g_urls{$link_id}) {
+			my $url = $g_urls{$link_id};
+			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+			$result = "?	# href = $3
+		  	[ \t]*
+			(			# $4
+			  (['"])	# quote char = $5
+			  (.*?)		# Title = $6
+			  \5		# matching quote
+			)?			# title is optional
+		  \)
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $link_text   = $2;
+		my $url	  		= $3;
+		my $title		= $6;
+
+		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+		$result = " tags.
+#
+	my $text = shift;
+
+	#
+	# First, handle reference-style labeled images: ![alt text][id]
+	#
+	$text =~ s{
+		(				# wrap whole match in $1
+		  !\[
+		    (.*?)		# alt text = $2
+		  \]
+
+		  [ ]?				# one optional space
+		  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+		  \[
+		    (.*?)		# id = $3
+		  \]
+
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $alt_text    = $2;
+		my $link_id     = lc $3;
+
+		if ($link_id eq "") {
+			$link_id = lc $alt_text;     # for shortcut links like ![this][].
+		}
+
+		$alt_text =~ s/"/"/g;
+		if (defined $g_urls{$link_id}) {
+			my $url = $g_urls{$link_id};
+			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+			$result = "?	# src url = $3
+		  	[ \t]*
+			(			# $4
+			  (['"])	# quote char = $5
+			  (.*?)		# title = $6
+			  \5		# matching quote
+			  [ \t]*
+			)?			# title is optional
+		  \)
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $alt_text    = $2;
+		my $url	  		= $3;
+		my $title		= '';
+		if (defined($6)) {
+			$title		= $6;
+		}
+
+		$alt_text =~ s/"/"/g;
+		$title    =~ s/"/"/g;
+		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+		$result = ""  .  _RunSpanGamut($1)  .  "\n\n";
+	}egmx;
+
+	$text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
+		""  .  _RunSpanGamut($1)  .  "\n\n";
+	}egmx;
+
+
+	# atx-style headers:
+	#	# Header 1
+	#	## Header 2
+	#	## Header 2 with closing hashes ##
+	#	...
+	#	###### Header 6
+	#
+	$text =~ s{
+			^(\#{1,6})	# $1 = string of #'s
+			[ \t]*
+			(.+?)		# $2 = Header text
+			[ \t]*
+			\#*			# optional closing #'s (not counted)
+			\n+
+		}{
+			my $h_level = length($1);
+			""  .  _RunSpanGamut($2)  .  "\n\n";
+		}egmx;
+
+	return $text;
+}
+
+
+sub _DoLists {
+#
+# Form HTML ordered (numbered) and unordered (bulleted) lists.
+#
+	my $text = shift;
+	my $less_than_tab = $g_tab_width - 1;
+
+	# Re-usable patterns to match list item bullets and number markers:
+	my $marker_ul  = qr/[*+-]/;
+	my $marker_ol  = qr/\d+[.]/;
+	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
+
+	# Re-usable pattern to match any entirel ul or ol list:
+	my $whole_list = qr{
+		(								# $1 = whole list
+		  (								# $2
+			[ ]{0,$less_than_tab}
+			(${marker_any})				# $3 = first list item marker
+			[ \t]+
+		  )
+		  (?s:.+?)
+		  (								# $4
+			  \z
+			|
+			  \n{2,}
+			  (?=\S)
+			  (?!						# Negative lookahead for another list item marker
+				[ \t]*
+				${marker_any}[ \t]+
+			  )
+		  )
+		)
+	}mx;
+
+	# We use a different prefix before nested lists than top-level lists.
+	# See extended comment in _ProcessListItems().
+	#
+	# Note: There's a bit of duplication here. My original implementation
+	# created a scalar regex pattern as the conditional result of the test on
+	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+	# substitution once, using the scalar as the pattern. This worked,
+	# everywhere except when running under MT on my hosting account at Pair
+	# Networks. There, this caused all rebuilds to be killed by the reaper (or
+	# perhaps they crashed, but that seems incredibly unlikely given that the
+	# same script on the same server ran fine *except* under MT. I've spent
+	# more time trying to figure out why this is happening than I'd like to
+	# admit. My only guess, backed up by the fact that this workaround works,
+	# is that Perl optimizes the substition when it can figure out that the
+	# pattern will never change, and when this optimization isn't on, we run
+	# afoul of the reaper. Thus, the slightly redundant code to that uses two
+	# static s/// patterns rather than one conditional pattern.
+
+	if ($g_list_level) {
+		$text =~ s{
+				^
+				$whole_list
+			}{
+				my $list = $1;
+				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+				# Turn double returns into triple returns, so that we can make a
+				# paragraph for the last item in a list, if necessary:
+				$list =~ s/\n{2,}/\n\n\n/g;
+				my $result = _ProcessListItems($list, $marker_any);
+				$result = "<$list_type>\n" . $result . "\n";
+				$result;
+			}egmx;
+	}
+	else {
+		$text =~ s{
+				(?:(?<=\n\n)|\A\n?)
+				$whole_list
+			}{
+				my $list = $1;
+				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
+				# Turn double returns into triple returns, so that we can make a
+				# paragraph for the last item in a list, if necessary:
+				$list =~ s/\n{2,}/\n\n\n/g;
+				my $result = _ProcessListItems($list, $marker_any);
+				$result = "<$list_type>\n" . $result . "\n";
+				$result;
+			}egmx;
+	}
+
+
+	return $text;
+}
+
+
+sub _ProcessListItems {
+#
+#	Process the contents of a single ordered or unordered list, splitting it
+#	into individual list items.
+#
+
+	my $list_str = shift;
+	my $marker_any = shift;
+
+
+	# The $g_list_level global keeps track of when we're inside a list.
+	# Each time we enter a list, we increment it; when we leave a list,
+	# we decrement. If it's zero, we're not in a list anymore.
+	#
+	# We do this because when we're not inside a list, we want to treat
+	# something like this:
+	#
+	#		I recommend upgrading to version
+	#		8. Oops, now this line is treated
+	#		as a sub-list.
+	#
+	# As a single paragraph, despite the fact that the second line starts
+	# with a digit-period-space sequence.
+	#
+	# Whereas when we're inside a list (or sub-list), that line will be
+	# treated as the start of a sub-list. What a kludge, huh? This is
+	# an aspect of Markdown's syntax that's hard to parse perfectly
+	# without resorting to mind-reading. Perhaps the solution is to
+	# change the syntax rules such that sub-lists must start with a
+	# starting cardinal number; e.g. "1." or "a.".
+
+	$g_list_level++;
+
+	# trim trailing blank lines:
+	$list_str =~ s/\n{2,}\z/\n/;
+
+
+	$list_str =~ s{
+		(\n)?							# leading line = $1
+		(^[ \t]*)						# leading whitespace = $2
+		($marker_any) [ \t]+			# list marker = $3
+		((?s:.+?)						# list item text   = $4
+		(\n{1,2}))
+		(?= \n* (\z | \2 ($marker_any) [ \t]+))
+	}{
+		my $item = $4;
+		my $leading_line = $1;
+		my $leading_space = $2;
+
+		if ($leading_line or ($item =~ m/\n{2,}/)) {
+			$item = _RunBlockGamut(_Outdent($item));
+		}
+		else {
+			# Recursion for sub-lists:
+			$item = _DoLists(_Outdent($item));
+			chomp $item;
+			$item = _RunSpanGamut($item);
+		}
+
+		"" . $item . "\n";
+	}egmx;
+
+	$g_list_level--;
+	return $list_str;
+}
+
+
+
+sub _DoCodeBlocks {
+#
+#	Process Markdown `` blocks.
+#	
+
+	my $text = shift;
+
+	$text =~ s{
+			(?:\n\n|\A)
+			(	            # $1 = the code block -- one or more lines, starting with a space/tab
+			  (?:
+			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
+			    .*\n+
+			  )+
+			)
+			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
+		}{
+			my $codeblock = $1;
+			my $result; # return value
+
+			$codeblock = _EncodeCode(_Outdent($codeblock));
+			$codeblock = _Detab($codeblock);
+			$codeblock =~ s/\A\n+//; # trim leading newlines
+			$codeblock =~ s/\s+\z//; # trim trailing whitespace
+
+			$result = "\n\n" . $codeblock . "\n\n\n";
+
+			$result;
+		}egmx;
+
+	return $text;
+}
+
+
+sub _DoCodeSpans {
+#
+# 	*	Backtick quotes are used for  spans.
+# 
+# 	*	You can use multiple backticks as the delimiters if you want to
+# 		include literal backticks in the code span. So, this input:
+#     
+#         Just type ``foo `bar` baz`` at the prompt.
+#     
+#     	Will translate to:
+#     
+#         Just type foo `bar` baz at the prompt.
+#     
+#		There's no arbitrary limit to the number of backticks you
+#		can use as delimters. If you need three consecutive backticks
+#		in your code, use four for delimiters, etc.
+#
+#	*	You can use spaces to get literal backticks at the edges:
+#     
+#         ... type `` `bar` `` ...
+#     
+#     	Turns to:
+#     
+#         ... type `bar` ...
+#
+
+	my $text = shift;
+
+	$text =~ s@
+			(`+)		# $1 = Opening run of `
+			(.+?)		# $2 = The code block
+			(?$c";
+		@egsx;
+
+	return $text;
+}
+
+
+sub _EncodeCode {
+#
+# Encode/escape certain characters inside Markdown code runs.
+# The point is that in code, these characters are literals,
+# and lose their special Markdown meanings.
+#
+    local $_ = shift;
+
+	# Encode all ampersands; HTML entities are not
+	# entities within a Markdown code span.
+	s/&/&/g;
+
+	# Encode $'s, but only if we're running under Blosxom.
+	# (Blosxom interpolates Perl variables in article bodies.)
+	{
+		no warnings 'once';
+    	if (defined($blosxom::version)) {
+    		s/\$/$/g;	
+    	}
+    }
+
+
+	# Do the angle bracket song and dance:
+	s! <  !<!gx;
+	s! >  !>!gx;
+
+	# Now, escape characters that are magic in Markdown:
+	s! \* !$g_escape_table{'*'}!gx;
+	s! _  !$g_escape_table{'_'}!gx;
+	s! {  !$g_escape_table{'{'}!gx;
+	s! }  !$g_escape_table{'}'}!gx;
+	s! \[ !$g_escape_table{'['}!gx;
+	s! \] !$g_escape_table{']'}!gx;
+	s! \\ !$g_escape_table{'\\'}!gx;
+
+	return $_;
+}
+
+
+sub _DoItalicsAndBold {
+	my $text = shift;
+
+	#  must go first:
+	$text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
+		{$2}gsx;
+
+	$text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
+		{$2}gsx;
+
+	return $text;
+}
+
+
+sub _DoBlockQuotes {
+	my $text = shift;
+
+	$text =~ s{
+		  (								# Wrap whole match in $1
+			(
+			  ^[ \t]*>[ \t]?			# '>' at the start of a line
+			    .+\n					# rest of the first line
+			  (.+\n)*					# subsequent consecutive lines
+			  \n*						# blanks
+			)+
+		  )
+		}{
+			my $bq = $1;
+			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting
+			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines
+			$bq = _RunBlockGamut($bq);		# recurse
+
+			$bq =~ s/^/  /g;
+			# These leading spaces screw with  content, so we need to fix that:
+			$bq =~ s{
+					(\s*.+?)
+				}{
+					my $pre = $1;
+					$pre =~ s/^  //mg;
+					$pre;
+				}egsx;
+
+			"\n$bq\n\n\n";
+		}egmx;
+
+
+	return $text;
+}
+
+
+sub _FormParagraphs {
+#
+#	Params:
+#		$text - string to process with html  tags
+#
+	my $text = shift;
+
+	# Strip leading and trailing lines:
+	$text =~ s/\A\n+//;
+	$text =~ s/\n+\z//;
+
+	my @grafs = split(/\n{2,}/, $text);
+
+	#
+	# Wrap 
 tags.
+	#
+	foreach (@grafs) {
+		unless (defined( $g_html_blocks{$_} )) {
+			$_ = _RunSpanGamut($_);
+			s/^([ \t]*)/
/;
+			$_ .= "";
+		}
+	}
+
+	#
+	# Unhashify HTML blocks
+	#
+	foreach (@grafs) {
+		if (defined( $g_html_blocks{$_} )) {
+			$_ = $g_html_blocks{$_};
+		}
+	}
+
+	return join "\n\n", @grafs;
+}
+
+
+sub _EncodeAmpsAndAngles {
+# Smart processing for ampersands and angle brackets that need to be encoded.
+
+	my $text = shift;
+
+	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+	#   http://bumppo.net/projects/amputator/
+ 	$text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
+
+	# Encode naked <'s
+ 	$text =~ s{<(?![a-z/?\$!])}{<}gi;
+
+	return $text;
+}
+
+
+sub _EncodeBackslashEscapes {
+#
+#   Parameter:  String.
+#   Returns:    The string, with after processing the following backslash
+#               escape sequences.
+#
+    local $_ = shift;
+
+    s! \\\\  !$g_escape_table{'\\'}!gx;		# Must process escaped backslashes first.
+    s! \\`   !$g_escape_table{'`'}!gx;
+    s! \\\*  !$g_escape_table{'*'}!gx;
+    s! \\_   !$g_escape_table{'_'}!gx;
+    s! \\\{  !$g_escape_table{'{'}!gx;
+    s! \\\}  !$g_escape_table{'}'}!gx;
+    s! \\\[  !$g_escape_table{'['}!gx;
+    s! \\\]  !$g_escape_table{']'}!gx;
+    s! \\\(  !$g_escape_table{'('}!gx;
+    s! \\\)  !$g_escape_table{')'}!gx;
+    s! \\>   !$g_escape_table{'>'}!gx;
+    s! \\\#  !$g_escape_table{'#'}!gx;
+    s! \\\+  !$g_escape_table{'+'}!gx;
+    s! \\\-  !$g_escape_table{'-'}!gx;
+    s! \\\.  !$g_escape_table{'.'}!gx;
+    s{ \\!  }{$g_escape_table{'!'}}gx;
+
+    return $_;
+}
+
+
+sub _DoAutoLinks {
+	my $text = shift;
+
+	$text =~ s{<((https?|ftp):[^'">\s]+)>}{$1}gi;
+
+	# Email addresses: 
+	$text =~ s{
+		<
+        (?:mailto:)?
+		(
+			[-.\w]+
+			\@
+			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+		)
+		>
+	}{
+		_EncodeEmailAddress( _UnescapeSpecialChars($1) );
+	}egix;
+
+	return $text;
+}
+
+
+sub _EncodeEmailAddress {
+#
+#	Input: an email address, e.g. "foo@example.com"
+#
+#	Output: the email address as a mailto link, with each character
+#		of the address encoded as either a decimal or hex entity, in
+#		the hopes of foiling most address harvesting spam bots. E.g.:
+#
+#	  foo
+#       @example.com
+#
+#	Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
+#	mailing list: 
+#
+
+	my $addr = shift;
+
+	srand;
+	my @encode = (
+		sub { '&#' .                 ord(shift)   . ';' },
+		sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
+		sub {                            shift          },
+	);
+
+	$addr = "mailto:" . $addr;
+
+	$addr =~ s{(.)}{
+		my $char = $1;
+		if ( $char eq '@' ) {
+			# this *must* be encoded. I insist.
+			$char = $encode[int rand 1]->($char);
+		} elsif ( $char ne ':' ) {
+			# leave ':' alone (to spot mailto: later)
+			my $r = rand;
+			# roughly 10% raw, 45% hex, 45% dec
+			$char = (
+				$r > .9   ?  $encode[2]->($char)  :
+				$r < .45  ?  $encode[1]->($char)  :
+							 $encode[0]->($char)
+			);
+		}
+		$char;
+	}gex;
+
+	$addr = qq{$addr};
+	$addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
+
+	return $addr;
+}
+
+
+sub _UnescapeSpecialChars {
+#
+# Swap back in all the special characters we've hidden.
+#
+	my $text = shift;
+
+	while( my($char, $hash) = each(%g_escape_table) ) {
+		$text =~ s/$hash/$char/g;
+	}
+    return $text;
+}
+
+
+sub _TokenizeHTML {
+#
+#   Parameter:  String containing HTML markup.
+#   Returns:    Reference to an array of the tokens comprising the input
+#               string. Each token is either a tag (possibly with nested,
+#               tags contained therein, such as , or a
+#               run of text between tags. Each element of the array is a
+#               two-element array; the first is either 'tag' or 'text';
+#               the second is the actual value.
+#
+#
+#   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+#       
+#
+
+    my $str = shift;
+    my $pos = 0;
+    my $len = length $str;
+    my @tokens;
+
+    my $depth = 6;
+    my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
+    my $match = qr/(?s:  ) |  # comment
+                   (?s: <\? .*? \?> ) |              # processing instruction
+                   $nested_tags/ix;                   # nested tags
+
+    while ($str =~ m/($match)/g) {
+        my $whole_tag = $1;
+        my $sec_start = pos $str;
+        my $tag_start = $sec_start - length $whole_tag;
+        if ($pos < $tag_start) {
+            push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
+        }
+        push @tokens, ['tag', $whole_tag];
+        $pos = pos $str;
+    }
+    push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
+    \@tokens;
+}
+
+
+sub _Outdent {
+#
+# Remove one level of line-leading tabs or spaces
+#
+	my $text = shift;
+
+	$text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
+	return $text;
+}
+
+
+sub _Detab {
+#
+# Cribbed from a post by Bart Lateur:
+# 
+#
+	my $text = shift;
+
+	$text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
+	return $text;
+}
+
+
+1;
+
+__END__
+
+
+=pod
+
+=head1 NAME
+
+B
+
+
+=head1 SYNOPSIS
+
+B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
+    [ I ... ]
+
+
+=head1 DESCRIPTION
+
+Markdown is a text-to-HTML filter; it translates an easy-to-read /
+easy-to-write structured text format into HTML. Markdown's text format
+is most similar to that of plain text email, and supports features such
+as headers, *emphasis*, code blocks, blockquotes, and links.
+
+Markdown's syntax is designed not as a generic markup language, but
+specifically to serve as a front-end to (X)HTML. You can  use span-level
+HTML tags anywhere in a Markdown document, and you can use block level
+HTML tags (like  and  as well).
+
+For more information about Markdown's syntax, see:
+
+    http://daringfireball.net/projects/markdown/
+
+
+=head1 OPTIONS
+
+Use "--" to end switch parsing. For example, to open a file named "-z", use:
+
+	Markdown.pl -- -z
+
+=over 4
+
+
+=item B<--html4tags>
+
+Use HTML 4 style for empty element tags, e.g.:
+
+    

+
+instead of Markdown's default XHTML style tags, e.g.:
+
+    

+
+
+=item B<-v>, B<--version>
+
+Display Markdown's version number and copyright information.
+
+
+=item B<-s>, B<--shortversion>
+
+Display the short-form version number.
+
+
+=back
+
+
+
+=head1 BUGS
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to:
+
+    support@daringfireball.net
+
+Please include with your report: (1) the example input; (2) the output
+you expected; (3) the output Markdown actually produced.
+
+
+=head1 VERSION HISTORY
+
+See the readme file for detailed release notes for this version.
+
+1.0.1 - 14 Dec 2004
+
+1.0 - 28 Aug 2004
+
+
+=head1 AUTHOR
+
+    John Gruber
+    http://daringfireball.net
+
+    PHP port and other contributions by Michel Fortin
+    http://michelf.com
+
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2003-2004 John Gruber   
+   
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+  be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
+
+=cut
diff --git a/filters/html-converters/resources/rst-template.txt b/filters/html-converters/resources/rst-template.txt
new file mode 100644
index 0000000..43cde42
--- /dev/null
+++ b/filters/html-converters/resources/rst-template.txt
@@ -0,0 +1,4 @@
+%(stylesheet)s
+%(body_pre_docinfo)s
+%(docinfo)s
+%(body)s
diff --git a/filters/html-converters/rst2html b/filters/html-converters/rst2html
new file mode 100755
index 0000000..c51f5be
--- /dev/null
+++ b/filters/html-converters/rst2html
@@ -0,0 +1,2 @@
+#!/bin/sh
+rst2html.py --template="$(dirname $0)/resources/rst-template.txt"
diff --git a/filters/html-converters/txt2html b/filters/html-converters/txt2html
new file mode 100755
index 0000000..a795995
--- /dev/null
+++ b/filters/html-converters/txt2html
@@ -0,0 +1,4 @@
+#!/bin/sh
+echo ""
+cat
+echo ""
diff --git a/filters/syntax-highlighting.py b/filters/syntax-highlighting.py
new file mode 100755
index 0000000..dcdba03
--- /dev/null
+++ b/filters/syntax-highlighting.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+# This script uses Pygments and Python3. You must have both installed for this to work.
+# http://pygments.org/
+# http://python.org/
+#
+# It may be used with the source-filter or repo.source-filter settings in cgitrc.
+#
+# The following environment variables can be used to retrieve the configuration
+# of the repository for which this script is called:
+# CGIT_REPO_URL        ( = repo.url       setting )
+# CGIT_REPO_NAME       ( = repo.name      setting )
+# CGIT_REPO_PATH       ( = repo.path      setting )
+# CGIT_REPO_OWNER      ( = repo.owner     setting )
+# CGIT_REPO_DEFBRANCH  ( = repo.defbranch setting )
+# CGIT_REPO_SECTION    ( = section        setting )
+# CGIT_REPO_CLONE_URL  ( = repo.clone-url setting )
+
+
+import sys
+import cgi
+import codecs
+from pygments.lexers import get_lexer_for_filename
+from pygments import highlight
+from pygments.formatters import HtmlFormatter
+
+sys.stdin = codecs.getreader("utf-8")(sys.stdin.detach())
+doc = sys.stdin.read()
+try:
+	lexer = get_lexer_for_filename(sys.argv[1])
+	formatter = HtmlFormatter(style='pastie')
+	sys.stdout.write("")
+
+	highlight(doc, lexer, formatter, sys.stdout)
+except:
+	sys.stdout.write(str(cgi.escape(doc).encode("ascii", "xmlcharrefreplace"), "ascii"))
-- 
cgit v1.2.3-54-g00ecf