├── Contents
├── Info.plist
├── MacOS
│ └── CodaScriptPlugIn
└── Resources
│ ├── 4AD4E35C-363A-4692-B7F6-A0262DE88EA0
│ ├── Markdown.pl
│ ├── Support Files
│ │ └── Markdown License
│ └── input.php
│ ├── 6F1582DA-902E-4A8A-81E8-2B47BB603321
│ ├── Markdown.pl
│ ├── Support Files
│ │ └── Markdown License
│ └── input.php
│ ├── Dutch.lproj
│ └── Localizable.strings
│ ├── English.lproj
│ └── Localizable.strings
│ └── Japanese.lproj
│ └── Localizable.strings
└── README.md
/Contents/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
s around 357 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 358 | # phrase emphasis, and spans. The list of tags we're looking for is 359 | # hard-coded: 360 | my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; 361 | my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; 362 | 363 | # First, look for nested blocks, e.g.: 364 | #
tags.
530 | # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
531 |
532 | foreach my $cur_token (@$tokens) {
533 | if ($cur_token->[0] eq "tag") {
534 | # Within tags, encode * and _ so they don't conflict
535 | # with their use in Markdown for italics and strong.
536 | # We're replacing each such character with its
537 | # corresponding MD5 checksum value; this is likely
538 | # overkill, but it should prevent us from colliding
539 | # with the escape values by accident.
540 | $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
541 | $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
542 | $text .= $cur_token->[1];
543 | } else {
544 | my $t = $cur_token->[1];
545 | $t = _EncodeBackslashEscapes($t);
546 | $text .= $t;
547 | }
548 | }
549 | return $text;
550 | }
551 |
552 |
553 | sub _DoAnchors {
554 | #
555 | # Turn Markdown link shortcuts into XHTML tags.
556 | #
557 | my $text = shift;
558 |
559 | #
560 | # First, handle reference-style links: [link text] [id]
561 | #
562 | $text =~ s{
563 | ( # wrap whole match in $1
564 | \[
565 | ($g_nested_brackets) # link text = $2
566 | \]
567 |
568 | [ ]? # one optional space
569 | (?:\n[ ]*)? # one optional newline followed by spaces
570 |
571 | \[
572 | (.*?) # id = $3
573 | \]
574 | )
575 | }{
576 | my $result;
577 | my $whole_match = $1;
578 | my $link_text = $2;
579 | my $link_id = lc $3;
580 |
581 | if ($link_id eq "") {
582 | $link_id = lc $link_text; # for shortcut links like [this][].
583 | }
584 |
585 | if (defined $g_urls{$link_id}) {
586 | my $url = $g_urls{$link_id};
587 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
588 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
589 | $result = "? # href = $3
615 | [ \t]*
616 | ( # $4
617 | (['"]) # quote char = $5
618 | (.*?) # Title = $6
619 | \5 # matching quote
620 | )? # title is optional
621 | \)
622 | )
623 | }{
624 | my $result;
625 | my $whole_match = $1;
626 | my $link_text = $2;
627 | my $url = $3;
628 | my $title = $6;
629 |
630 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
631 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
632 | $result = " tags.
653 | #
654 | my $text = shift;
655 |
656 | #
657 | # First, handle reference-style labeled images: ![alt text][id]
658 | #
659 | $text =~ s{
660 | ( # wrap whole match in $1
661 | !\[
662 | (.*?) # alt text = $2
663 | \]
664 |
665 | [ ]? # one optional space
666 | (?:\n[ ]*)? # one optional newline followed by spaces
667 |
668 | \[
669 | (.*?) # id = $3
670 | \]
671 |
672 | )
673 | }{
674 | my $result;
675 | my $whole_match = $1;
676 | my $alt_text = $2;
677 | my $link_id = lc $3;
678 |
679 | if ($link_id eq "") {
680 | $link_id = lc $alt_text; # for shortcut links like ![this][].
681 | }
682 |
683 | $alt_text =~ s/"/"/g;
684 | if (defined $g_urls{$link_id}) {
685 | my $url = $g_urls{$link_id};
686 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
687 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
688 | $result = "
? # src url = $3
717 | [ \t]*
718 | ( # $4
719 | (['"]) # quote char = $5
720 | (.*?) # title = $6
721 | \5 # matching quote
722 | [ \t]*
723 | )? # title is optional
724 | \)
725 | )
726 | }{
727 | my $result;
728 | my $whole_match = $1;
729 | my $alt_text = $2;
730 | my $url = $3;
731 | my $title = '';
732 | if (defined($6)) {
733 | $title = $6;
734 | }
735 |
736 | $alt_text =~ s/"/"/g;
737 | $title =~ s/"/"/g;
738 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
739 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
740 | $result = "
" . _RunSpanGamut($1) . "\n\n";
767 | }egmx;
768 |
769 | $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
770 | "" . _RunSpanGamut($1) . "
\n\n";
771 | }egmx;
772 |
773 |
774 | # atx-style headers:
775 | # # Header 1
776 | # ## Header 2
777 | # ## Header 2 with closing hashes ##
778 | # ...
779 | # ###### Header 6
780 | #
781 | $text =~ s{
782 | ^(\#{1,6}) # $1 = string of #'s
783 | [ \t]*
784 | (.+?) # $2 = Header text
785 | [ \t]*
786 | \#* # optional closing #'s (not counted)
787 | \n+
788 | }{
789 | my $h_level = length($1);
790 | "" . _RunSpanGamut($2) . " \n\n";
791 | }egmx;
792 |
793 | return $text;
794 | }
795 |
796 |
797 | sub _DoLists {
798 | #
799 | # Form HTML ordered (numbered) and unordered (bulleted) lists.
800 | #
801 | my $text = shift;
802 | my $less_than_tab = $g_tab_width - 1;
803 |
804 | # Re-usable patterns to match list item bullets and number markers:
805 | my $marker_ul = qr/[*+-]/;
806 | my $marker_ol = qr/\d+[.]/;
807 | my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
808 |
809 | # Re-usable pattern to match any entirel ul or ol list:
810 | my $whole_list = qr{
811 | ( # $1 = whole list
812 | ( # $2
813 | [ ]{0,$less_than_tab}
814 | (${marker_any}) # $3 = first list item marker
815 | [ \t]+
816 | )
817 | (?s:.+?)
818 | ( # $4
819 | \z
820 | |
821 | \n{2,}
822 | (?=\S)
823 | (?! # Negative lookahead for another list item marker
824 | [ \t]*
825 | ${marker_any}[ \t]+
826 | )
827 | )
828 | )
829 | }mx;
830 |
831 | # We use a different prefix before nested lists than top-level lists.
832 | # See extended comment in _ProcessListItems().
833 | #
834 | # Note: There's a bit of duplication here. My original implementation
835 | # created a scalar regex pattern as the conditional result of the test on
836 | # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
837 | # substitution once, using the scalar as the pattern. This worked,
838 | # everywhere except when running under MT on my hosting account at Pair
839 | # Networks. There, this caused all rebuilds to be killed by the reaper (or
840 | # perhaps they crashed, but that seems incredibly unlikely given that the
841 | # same script on the same server ran fine *except* under MT. I've spent
842 | # more time trying to figure out why this is happening than I'd like to
843 | # admit. My only guess, backed up by the fact that this workaround works,
844 | # is that Perl optimizes the substition when it can figure out that the
845 | # pattern will never change, and when this optimization isn't on, we run
846 | # afoul of the reaper. Thus, the slightly redundant code to that uses two
847 | # static s/// patterns rather than one conditional pattern.
848 |
849 | if ($g_list_level) {
850 | $text =~ s{
851 | ^
852 | $whole_list
853 | }{
854 | my $list = $1;
855 | my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
856 | # Turn double returns into triple returns, so that we can make a
857 | # paragraph for the last item in a list, if necessary:
858 | $list =~ s/\n{2,}/\n\n\n/g;
859 | my $result = _ProcessListItems($list, $marker_any);
860 | $result = "<$list_type>\n" . $result . "$list_type>\n";
861 | $result;
862 | }egmx;
863 | }
864 | else {
865 | $text =~ s{
866 | (?:(?<=\n\n)|\A\n?)
867 | $whole_list
868 | }{
869 | my $list = $1;
870 | my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
871 | # Turn double returns into triple returns, so that we can make a
872 | # paragraph for the last item in a list, if necessary:
873 | $list =~ s/\n{2,}/\n\n\n/g;
874 | my $result = _ProcessListItems($list, $marker_any);
875 | $result = "<$list_type>\n" . $result . "$list_type>\n";
876 | $result;
877 | }egmx;
878 | }
879 |
880 |
881 | return $text;
882 | }
883 |
884 |
885 | sub _ProcessListItems {
886 | #
887 | # Process the contents of a single ordered or unordered list, splitting it
888 | # into individual list items.
889 | #
890 |
891 | my $list_str = shift;
892 | my $marker_any = shift;
893 |
894 |
895 | # The $g_list_level global keeps track of when we're inside a list.
896 | # Each time we enter a list, we increment it; when we leave a list,
897 | # we decrement. If it's zero, we're not in a list anymore.
898 | #
899 | # We do this because when we're not inside a list, we want to treat
900 | # something like this:
901 | #
902 | # I recommend upgrading to version
903 | # 8. Oops, now this line is treated
904 | # as a sub-list.
905 | #
906 | # As a single paragraph, despite the fact that the second line starts
907 | # with a digit-period-space sequence.
908 | #
909 | # Whereas when we're inside a list (or sub-list), that line will be
910 | # treated as the start of a sub-list. What a kludge, huh? This is
911 | # an aspect of Markdown's syntax that's hard to parse perfectly
912 | # without resorting to mind-reading. Perhaps the solution is to
913 | # change the syntax rules such that sub-lists must start with a
914 | # starting cardinal number; e.g. "1." or "a.".
915 |
916 | $g_list_level++;
917 |
918 | # trim trailing blank lines:
919 | $list_str =~ s/\n{2,}\z/\n/;
920 |
921 |
922 | $list_str =~ s{
923 | (\n)? # leading line = $1
924 | (^[ \t]*) # leading whitespace = $2
925 | ($marker_any) [ \t]+ # list marker = $3
926 | ((?s:.+?) # list item text = $4
927 | (\n{1,2}))
928 | (?= \n* (\z | \2 ($marker_any) [ \t]+))
929 | }{
930 | my $item = $4;
931 | my $leading_line = $1;
932 | my $leading_space = $2;
933 |
934 | if ($leading_line or ($item =~ m/\n{2,}/)) {
935 | $item = _RunBlockGamut(_Outdent($item));
936 | }
937 | else {
938 | # Recursion for sub-lists:
939 | $item = _DoLists(_Outdent($item));
940 | chomp $item;
941 | $item = _RunSpanGamut($item);
942 | }
943 |
944 | "" . $item . " \n";
945 | }egmx;
946 |
947 | $g_list_level--;
948 | return $list_str;
949 | }
950 |
951 |
952 |
953 | sub _DoCodeBlocks {
954 | #
955 | # Process Markdown `` blocks.
956 | #
957 |
958 | my $text = shift;
959 |
960 | $text =~ s{
961 | (?:\n\n|\A)
962 | ( # $1 = the code block -- one or more lines, starting with a space/tab
963 | (?:
964 | (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
965 | .*\n+
966 | )+
967 | )
968 | ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
969 | }{
970 | my $codeblock = $1;
971 | my $result; # return value
972 |
973 | $codeblock = _EncodeCode(_Outdent($codeblock));
974 | $codeblock = _Detab($codeblock);
975 | $codeblock =~ s/\A\n+//; # trim leading newlines
976 | $codeblock =~ s/\s+\z//; # trim trailing whitespace
977 |
978 | $result = "\n\n" . $codeblock . "\n
\n\n";
979 |
980 | $result;
981 | }egmx;
982 |
983 | return $text;
984 | }
985 |
986 |
987 | sub _DoCodeSpans {
988 | #
989 | # * Backtick quotes are used for
spans.
990 | #
991 | # * You can use multiple backticks as the delimiters if you want to
992 | # include literal backticks in the code span. So, this input:
993 | #
994 | # Just type ``foo `bar` baz`` at the prompt.
995 | #
996 | # Will translate to:
997 | #
998 | # Just type foo `bar` baz
at the prompt.
999 | #
1000 | # There's no arbitrary limit to the number of backticks you
1001 | # can use as delimters. If you need three consecutive backticks
1002 | # in your code, use four for delimiters, etc.
1003 | #
1004 | # * You can use spaces to get literal backticks at the edges:
1005 | #
1006 | # ... type `` `bar` `` ...
1007 | #
1008 | # Turns to:
1009 | #
1010 | # ... type `bar`
...
1011 | #
1012 |
1013 | my $text = shift;
1014 |
1015 | $text =~ s@
1016 | (`+) # $1 = Opening run of `
1017 | (.+?) # $2 = The code block
1018 | (?$c
";
1027 | @egsx;
1028 |
1029 | return $text;
1030 | }
1031 |
1032 |
1033 | sub _EncodeCode {
1034 | #
1035 | # Encode/escape certain characters inside Markdown code runs.
1036 | # The point is that in code, these characters are literals,
1037 | # and lose their special Markdown meanings.
1038 | #
1039 | local $_ = shift;
1040 |
1041 | # Encode all ampersands; HTML entities are not
1042 | # entities within a Markdown code span.
1043 | s/&/&/g;
1044 |
1045 | # Encode $'s, but only if we're running under Blosxom.
1046 | # (Blosxom interpolates Perl variables in article bodies.)
1047 | {
1048 | no warnings 'once';
1049 | if (defined($blosxom::version)) {
1050 | s/\$/$/g;
1051 | }
1052 | }
1053 |
1054 |
1055 | # Do the angle bracket song and dance:
1056 | s! < !<!gx;
1057 | s! > !>!gx;
1058 |
1059 | # Now, escape characters that are magic in Markdown:
1060 | s! \* !$g_escape_table{'*'}!gx;
1061 | s! _ !$g_escape_table{'_'}!gx;
1062 | s! { !$g_escape_table{'{'}!gx;
1063 | s! } !$g_escape_table{'}'}!gx;
1064 | s! \[ !$g_escape_table{'['}!gx;
1065 | s! \] !$g_escape_table{']'}!gx;
1066 | s! \\ !$g_escape_table{'\\'}!gx;
1067 |
1068 | return $_;
1069 | }
1070 |
1071 |
1072 | sub _DoItalicsAndBold {
1073 | my $text = shift;
1074 |
1075 | # must go first:
1076 | $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1077 | {$2}gsx;
1078 |
1079 | $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1080 | {$2}gsx;
1081 |
1082 | return $text;
1083 | }
1084 |
1085 |
1086 | sub _DoBlockQuotes {
1087 | my $text = shift;
1088 |
1089 | $text =~ s{
1090 | ( # Wrap whole match in $1
1091 | (
1092 | ^[ \t]*>[ \t]? # '>' at the start of a line
1093 | .+\n # rest of the first line
1094 | (.+\n)* # subsequent consecutive lines
1095 | \n* # blanks
1096 | )+
1097 | )
1098 | }{
1099 | my $bq = $1;
1100 | $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1101 | $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1102 | $bq = _RunBlockGamut($bq); # recurse
1103 |
1104 | $bq =~ s/^/ /g;
1105 | # These leading spaces screw with content, so we need to fix that:
1106 | $bq =~ s{
1107 | (\s*.+?
)
1108 | }{
1109 | my $pre = $1;
1110 | $pre =~ s/^ //mg;
1111 | $pre;
1112 | }egsx;
1113 |
1114 | "\n$bq\n
\n\n";
1115 | }egmx;
1116 |
1117 |
1118 | return $text;
1119 | }
1120 |
1121 |
1122 | sub _FormParagraphs {
1123 | #
1124 | # Params:
1125 | # $text - string to process with html tags
1126 | #
1127 | my $text = shift;
1128 |
1129 | # Strip leading and trailing lines:
1130 | $text =~ s/\A\n+//;
1131 | $text =~ s/\n+\z//;
1132 |
1133 | my @grafs = split(/\n{2,}/, $text);
1134 |
1135 | #
1136 | # Wrap
tags.
1137 | #
1138 | foreach (@grafs) {
1139 | unless (defined( $g_html_blocks{$_} )) {
1140 | $_ = _RunSpanGamut($_);
1141 | s/^([ \t]*)/
/;
1142 | $_ .= "
";
1143 | }
1144 | }
1145 |
1146 | #
1147 | # Unhashify HTML blocks
1148 | #
1149 | foreach (@grafs) {
1150 | if (defined( $g_html_blocks{$_} )) {
1151 | $_ = $g_html_blocks{$_};
1152 | }
1153 | }
1154 |
1155 | return join "\n\n", @grafs;
1156 | }
1157 |
1158 |
1159 | sub _EncodeAmpsAndAngles {
1160 | # Smart processing for ampersands and angle brackets that need to be encoded.
1161 |
1162 | my $text = shift;
1163 |
1164 | # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1165 | # http://bumppo.net/projects/amputator/
1166 | $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1167 |
1168 | # Encode naked <'s
1169 | $text =~ s{<(?![a-z/?\$!])}{<}gi;
1170 |
1171 | return $text;
1172 | }
1173 |
1174 |
1175 | sub _EncodeBackslashEscapes {
1176 | #
1177 | # Parameter: String.
1178 | # Returns: The string, with after processing the following backslash
1179 | # escape sequences.
1180 | #
1181 | local $_ = shift;
1182 |
1183 | s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
1184 | s! \\` !$g_escape_table{'`'}!gx;
1185 | s! \\\* !$g_escape_table{'*'}!gx;
1186 | s! \\_ !$g_escape_table{'_'}!gx;
1187 | s! \\\{ !$g_escape_table{'{'}!gx;
1188 | s! \\\} !$g_escape_table{'}'}!gx;
1189 | s! \\\[ !$g_escape_table{'['}!gx;
1190 | s! \\\] !$g_escape_table{']'}!gx;
1191 | s! \\\( !$g_escape_table{'('}!gx;
1192 | s! \\\) !$g_escape_table{')'}!gx;
1193 | s! \\> !$g_escape_table{'>'}!gx;
1194 | s! \\\# !$g_escape_table{'#'}!gx;
1195 | s! \\\+ !$g_escape_table{'+'}!gx;
1196 | s! \\\- !$g_escape_table{'-'}!gx;
1197 | s! \\\. !$g_escape_table{'.'}!gx;
1198 | s{ \\! }{$g_escape_table{'!'}}gx;
1199 |
1200 | return $_;
1201 | }
1202 |
1203 |
1204 | sub _DoAutoLinks {
1205 | my $text = shift;
1206 |
1207 | $text =~ s{<((https?|ftp):[^'">\s]+)>}{$1}gi;
1208 |
1209 | # Email addresses:
1210 | $text =~ s{
1211 | <
1212 | (?:mailto:)?
1213 | (
1214 | [-.\w]+
1215 | \@
1216 | [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1217 | )
1218 | >
1219 | }{
1220 | _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1221 | }egix;
1222 |
1223 | return $text;
1224 | }
1225 |
1226 |
1227 | sub _EncodeEmailAddress {
1228 | #
1229 | # Input: an email address, e.g. "foo@example.com"
1230 | #
1231 | # Output: the email address as a mailto link, with each character
1232 | # of the address encoded as either a decimal or hex entity, in
1233 | # the hopes of foiling most address harvesting spam bots. E.g.:
1234 | #
1235 | # foo
1237 | # @example.com
1238 | #
1239 | # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1240 | # mailing list:
1241 | #
1242 |
1243 | my $addr = shift;
1244 |
1245 | srand;
1246 | my @encode = (
1247 | sub { '' . ord(shift) . ';' },
1248 | sub { '' . sprintf( "%X", ord(shift) ) . ';' },
1249 | sub { shift },
1250 | );
1251 |
1252 | $addr = "mailto:" . $addr;
1253 |
1254 | $addr =~ s{(.)}{
1255 | my $char = $1;
1256 | if ( $char eq '@' ) {
1257 | # this *must* be encoded. I insist.
1258 | $char = $encode[int rand 1]->($char);
1259 | } elsif ( $char ne ':' ) {
1260 | # leave ':' alone (to spot mailto: later)
1261 | my $r = rand;
1262 | # roughly 10% raw, 45% hex, 45% dec
1263 | $char = (
1264 | $r > .9 ? $encode[2]->($char) :
1265 | $r < .45 ? $encode[1]->($char) :
1266 | $encode[0]->($char)
1267 | );
1268 | }
1269 | $char;
1270 | }gex;
1271 |
1272 | $addr = qq{$addr};
1273 | $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1274 |
1275 | return $addr;
1276 | }
1277 |
1278 |
1279 | sub _UnescapeSpecialChars {
1280 | #
1281 | # Swap back in all the special characters we've hidden.
1282 | #
1283 | my $text = shift;
1284 |
1285 | while( my($char, $hash) = each(%g_escape_table) ) {
1286 | $text =~ s/$hash/$char/g;
1287 | }
1288 | return $text;
1289 | }
1290 |
1291 |
1292 | sub _TokenizeHTML {
1293 | #
1294 | # Parameter: String containing HTML markup.
1295 | # Returns: Reference to an array of the tokens comprising the input
1296 | # string. Each token is either a tag (possibly with nested,
1297 | # tags contained therein, such as , or a
1298 | # run of text between tags. Each element of the array is a
1299 | # two-element array; the first is either 'tag' or 'text';
1300 | # the second is the actual value.
1301 | #
1302 | #
1303 | # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1304 | #
1305 | #
1306 |
1307 | my $str = shift;
1308 | my $pos = 0;
1309 | my $len = length $str;
1310 | my @tokens;
1311 |
1312 | my $depth = 6;
1313 | my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
1314 | my $match = qr/(?s: ) | # comment
1315 | (?s: <\? .*? \?> ) | # processing instruction
1316 | $nested_tags/ix; # nested tags
1317 |
1318 | while ($str =~ m/($match)/g) {
1319 | my $whole_tag = $1;
1320 | my $sec_start = pos $str;
1321 | my $tag_start = $sec_start - length $whole_tag;
1322 | if ($pos < $tag_start) {
1323 | push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1324 | }
1325 | push @tokens, ['tag', $whole_tag];
1326 | $pos = pos $str;
1327 | }
1328 | push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1329 | \@tokens;
1330 | }
1331 |
1332 |
1333 | sub _Outdent {
1334 | #
1335 | # Remove one level of line-leading tabs or spaces
1336 | #
1337 | my $text = shift;
1338 |
1339 | $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1340 | return $text;
1341 | }
1342 |
1343 |
1344 | sub _Detab {
1345 | #
1346 | # Cribbed from a post by Bart Lateur:
1347 | #
1348 | #
1349 | my $text = shift;
1350 |
1351 | $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1352 | return $text;
1353 | }
1354 |
1355 |
1356 | 1;
1357 |
1358 | __END__
1359 |
1360 |
1361 | =pod
1362 |
1363 | =head1 NAME
1364 |
1365 | B
1366 |
1367 |
1368 | =head1 SYNOPSIS
1369 |
1370 | B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
1371 | [ I ... ]
1372 |
1373 |
1374 | =head1 DESCRIPTION
1375 |
1376 | Markdown is a text-to-HTML filter; it translates an easy-to-read /
1377 | easy-to-write structured text format into HTML. Markdown's text format
1378 | is most similar to that of plain text email, and supports features such
1379 | as headers, *emphasis*, code blocks, blockquotes, and links.
1380 |
1381 | Markdown's syntax is designed not as a generic markup language, but
1382 | specifically to serve as a front-end to (X)HTML. You can use span-level
1383 | HTML tags anywhere in a Markdown document, and you can use block level
1384 | HTML tags (like and