├── .gitignore ├── public ├── favicon.png ├── image │ └── baker.png └── css │ └── base.css ├── layout ├── fontawesome.md ├── head.md ├── bootstrap.md ├── author.md ├── disqus.md ├── post.md └── index.md ├── post └── 2015-06-19-hello-world.md ├── README.md ├── baker ├── LICENSE.txt └── Markdown.pl /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | -------------------------------------------------------------------------------- /public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorchu/baker/HEAD/public/favicon.png -------------------------------------------------------------------------------- /public/image/baker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorchu/baker/HEAD/public/image/baker.png -------------------------------------------------------------------------------- /layout/fontawesome.md: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | 5 | -------------------------------------------------------------------------------- /layout/head.md: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | 5 | 6 | 7 | 8 | 9 | @include bootstrap 10 | @include fontawesome 11 | -------------------------------------------------------------------------------- /layout/bootstrap.md: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /post/2015-06-19-hello-world.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: hello world! 3 | date: 2015-06-19T01:01:56Z 4 | layout: post 5 | draft: false 6 | summary: This is your first post. 7 | --- 8 | 9 | Welcome to Baker. This is your first post. Edit or delete it, then start blogging! 10 | 11 | 12 | -------------------------------------------------------------------------------- /layout/author.md: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 |
5 | 6 |Published on
22 |{{ summary }}
23 |
33 | s around 320 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 321 | # phrase emphasis, and spans. The list of tags we're looking for is 322 | # hard-coded: 323 | my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; 324 | my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; 325 | 326 | # First, look for nested blocks, e.g.: 327 | #
tags.
493 | # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
494 |
495 | foreach my $cur_token (@$tokens) {
496 | if ($cur_token->[0] eq "tag") {
497 | # Within tags, encode * and _ so they don't conflict
498 | # with their use in Markdown for italics and strong.
499 | # We're replacing each such character with its
500 | # corresponding MD5 checksum value; this is likely
501 | # overkill, but it should prevent us from colliding
502 | # with the escape values by accident.
503 | $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
504 | $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
505 | $text .= $cur_token->[1];
506 | } else {
507 | my $t = $cur_token->[1];
508 | $t = _EncodeBackslashEscapes($t);
509 | $text .= $t;
510 | }
511 | }
512 | return $text;
513 | }
514 |
515 |
516 | sub _DoAnchors {
517 | #
518 | # Turn Markdown link shortcuts into XHTML tags.
519 | #
520 | my $text = shift;
521 |
522 | #
523 | # First, handle reference-style links: [link text] [id]
524 | #
525 | $text =~ s{
526 | ( # wrap whole match in $1
527 | \[
528 | ($g_nested_brackets) # link text = $2
529 | \]
530 |
531 | [ ]? # one optional space
532 | (?:\n[ ]*)? # one optional newline followed by spaces
533 |
534 | \[
535 | (.*?) # id = $3
536 | \]
537 | )
538 | }{
539 | my $result;
540 | my $whole_match = $1;
541 | my $link_text = $2;
542 | my $link_id = lc $3;
543 |
544 | if ($link_id eq "") {
545 | $link_id = lc $link_text; # for shortcut links like [this][].
546 | }
547 |
548 | if (defined $g_urls{$link_id}) {
549 | my $url = $g_urls{$link_id};
550 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
551 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
552 | $result = "? # href = $3
578 | [ \t]*
579 | ( # $4
580 | (['"]) # quote char = $5
581 | (.*?) # Title = $6
582 | \5 # matching quote
583 | )? # title is optional
584 | \)
585 | )
586 | }{
587 | my $result;
588 | my $whole_match = $1;
589 | my $link_text = $2;
590 | my $url = $3;
591 | my $title = $6;
592 |
593 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
594 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
595 | $result = " tags.
616 | #
617 | my $text = shift;
618 |
619 | #
620 | # First, handle reference-style labeled images: ![alt text][id]
621 | #
622 | $text =~ s{
623 | ( # wrap whole match in $1
624 | !\[
625 | (.*?) # alt text = $2
626 | \]
627 |
628 | [ ]? # one optional space
629 | (?:\n[ ]*)? # one optional newline followed by spaces
630 |
631 | \[
632 | (.*?) # id = $3
633 | \]
634 |
635 | )
636 | }{
637 | my $result;
638 | my $whole_match = $1;
639 | my $alt_text = $2;
640 | my $link_id = lc $3;
641 |
642 | if ($link_id eq "") {
643 | $link_id = lc $alt_text; # for shortcut links like ![this][].
644 | }
645 |
646 | $alt_text =~ s/"/"/g;
647 | if (defined $g_urls{$link_id}) {
648 | my $url = $g_urls{$link_id};
649 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
650 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
651 | $result = "
? # src url = $3
680 | [ \t]*
681 | ( # $4
682 | (['"]) # quote char = $5
683 | (.*?) # title = $6
684 | \5 # matching quote
685 | [ \t]*
686 | )? # title is optional
687 | \)
688 | )
689 | }{
690 | my $result;
691 | my $whole_match = $1;
692 | my $alt_text = $2;
693 | my $url = $3;
694 | my $title = '';
695 | if (defined($6)) {
696 | $title = $6;
697 | }
698 |
699 | $alt_text =~ s/"/"/g;
700 | $title =~ s/"/"/g;
701 | $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
702 | $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
703 | $result = "
" . _RunSpanGamut($1) . "\n\n";
730 | }egmx;
731 |
732 | $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
733 | "" . _RunSpanGamut($1) . "
\n\n";
734 | }egmx;
735 |
736 |
737 | # atx-style headers:
738 | # # Header 1
739 | # ## Header 2
740 | # ## Header 2 with closing hashes ##
741 | # ...
742 | # ###### Header 6
743 | #
744 | $text =~ s{
745 | ^(\#{1,6}) # $1 = string of #'s
746 | [ \t]*
747 | (.+?) # $2 = Header text
748 | [ \t]*
749 | \#* # optional closing #'s (not counted)
750 | \n+
751 | }{
752 | my $h_level = length($1);
753 | "" . _RunSpanGamut($2) . " \n\n";
754 | }egmx;
755 |
756 | return $text;
757 | }
758 |
759 |
760 | sub _DoLists {
761 | #
762 | # Form HTML ordered (numbered) and unordered (bulleted) lists.
763 | #
764 | my $text = shift;
765 | my $less_than_tab = $g_tab_width - 1;
766 |
767 | # Re-usable patterns to match list item bullets and number markers:
768 | my $marker_ul = qr/[*+-]/;
769 | my $marker_ol = qr/\d+[.]/;
770 | my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
771 |
772 | # Re-usable pattern to match any entirel ul or ol list:
773 | my $whole_list = qr{
774 | ( # $1 = whole list
775 | ( # $2
776 | [ ]{0,$less_than_tab}
777 | (${marker_any}) # $3 = first list item marker
778 | [ \t]+
779 | )
780 | (?s:.+?)
781 | ( # $4
782 | \z
783 | |
784 | \n{2,}
785 | (?=\S)
786 | (?! # Negative lookahead for another list item marker
787 | [ \t]*
788 | ${marker_any}[ \t]+
789 | )
790 | )
791 | )
792 | }mx;
793 |
794 | # We use a different prefix before nested lists than top-level lists.
795 | # See extended comment in _ProcessListItems().
796 | #
797 | # Note: There's a bit of duplication here. My original implementation
798 | # created a scalar regex pattern as the conditional result of the test on
799 | # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
800 | # substitution once, using the scalar as the pattern. This worked,
801 | # everywhere except when running under MT on my hosting account at Pair
802 | # Networks. There, this caused all rebuilds to be killed by the reaper (or
803 | # perhaps they crashed, but that seems incredibly unlikely given that the
804 | # same script on the same server ran fine *except* under MT. I've spent
805 | # more time trying to figure out why this is happening than I'd like to
806 | # admit. My only guess, backed up by the fact that this workaround works,
807 | # is that Perl optimizes the substition when it can figure out that the
808 | # pattern will never change, and when this optimization isn't on, we run
809 | # afoul of the reaper. Thus, the slightly redundant code to that uses two
810 | # static s/// patterns rather than one conditional pattern.
811 |
812 | if ($g_list_level) {
813 | $text =~ s{
814 | ^
815 | $whole_list
816 | }{
817 | my $list = $1;
818 | my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
819 | # Turn double returns into triple returns, so that we can make a
820 | # paragraph for the last item in a list, if necessary:
821 | $list =~ s/\n{2,}/\n\n\n/g;
822 | my $result = _ProcessListItems($list, $marker_any);
823 | $result = "<$list_type>\n" . $result . "$list_type>\n";
824 | $result;
825 | }egmx;
826 | }
827 | else {
828 | $text =~ s{
829 | (?:(?<=\n\n)|\A\n?)
830 | $whole_list
831 | }{
832 | my $list = $1;
833 | my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
834 | # Turn double returns into triple returns, so that we can make a
835 | # paragraph for the last item in a list, if necessary:
836 | $list =~ s/\n{2,}/\n\n\n/g;
837 | my $result = _ProcessListItems($list, $marker_any);
838 | $result = "<$list_type>\n" . $result . "$list_type>\n";
839 | $result;
840 | }egmx;
841 | }
842 |
843 |
844 | return $text;
845 | }
846 |
847 |
848 | sub _ProcessListItems {
849 | #
850 | # Process the contents of a single ordered or unordered list, splitting it
851 | # into individual list items.
852 | #
853 |
854 | my $list_str = shift;
855 | my $marker_any = shift;
856 |
857 |
858 | # The $g_list_level global keeps track of when we're inside a list.
859 | # Each time we enter a list, we increment it; when we leave a list,
860 | # we decrement. If it's zero, we're not in a list anymore.
861 | #
862 | # We do this because when we're not inside a list, we want to treat
863 | # something like this:
864 | #
865 | # I recommend upgrading to version
866 | # 8. Oops, now this line is treated
867 | # as a sub-list.
868 | #
869 | # As a single paragraph, despite the fact that the second line starts
870 | # with a digit-period-space sequence.
871 | #
872 | # Whereas when we're inside a list (or sub-list), that line will be
873 | # treated as the start of a sub-list. What a kludge, huh? This is
874 | # an aspect of Markdown's syntax that's hard to parse perfectly
875 | # without resorting to mind-reading. Perhaps the solution is to
876 | # change the syntax rules such that sub-lists must start with a
877 | # starting cardinal number; e.g. "1." or "a.".
878 |
879 | $g_list_level++;
880 |
881 | # trim trailing blank lines:
882 | $list_str =~ s/\n{2,}\z/\n/;
883 |
884 |
885 | $list_str =~ s{
886 | (\n)? # leading line = $1
887 | (^[ \t]*) # leading whitespace = $2
888 | ($marker_any) [ \t]+ # list marker = $3
889 | ((?s:.+?) # list item text = $4
890 | (\n{1,2}))
891 | (?= \n* (\z | \2 ($marker_any) [ \t]+))
892 | }{
893 | my $item = $4;
894 | my $leading_line = $1;
895 | my $leading_space = $2;
896 |
897 | if ($leading_line or ($item =~ m/\n{2,}/)) {
898 | $item = _RunBlockGamut(_Outdent($item));
899 | }
900 | else {
901 | # Recursion for sub-lists:
902 | $item = _DoLists(_Outdent($item));
903 | chomp $item;
904 | $item = _RunSpanGamut($item);
905 | }
906 |
907 | "" . $item . " \n";
908 | }egmx;
909 |
910 | $g_list_level--;
911 | return $list_str;
912 | }
913 |
914 |
915 |
916 | sub _DoCodeBlocks {
917 | #
918 | # Process Markdown `` blocks.
919 | #
920 |
921 | my $text = shift;
922 |
923 | $text =~ s{
924 | (?:\n\n|\A)
925 | ( # $1 = the code block -- one or more lines, starting with a space/tab
926 | (?:
927 | (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
928 | .*\n+
929 | )+
930 | )
931 | ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
932 | }{
933 | my $codeblock = $1;
934 | my $result; # return value
935 |
936 | $codeblock = _EncodeCode(_Outdent($codeblock));
937 | $codeblock = _Detab($codeblock);
938 | $codeblock =~ s/\A\n+//; # trim leading newlines
939 | $codeblock =~ s/\s+\z//; # trim trailing whitespace
940 |
941 | $result = "\n\n" . $codeblock . "\n
\n\n";
942 |
943 | $result;
944 | }egmx;
945 |
946 | return $text;
947 | }
948 |
949 |
950 | sub _DoCodeSpans {
951 | #
952 | # * Backtick quotes are used for spans.
953 | #
954 | # * You can use multiple backticks as the delimiters if you want to
955 | # include literal backticks in the code span. So, this input:
956 | #
957 | # Just type ``foo `bar` baz`` at the prompt.
958 | #
959 | # Will translate to:
960 | #
961 | # Just type foo `bar` baz at the prompt.
962 | #
963 | # There's no arbitrary limit to the number of backticks you
964 | # can use as delimters. If you need three consecutive backticks
965 | # in your code, use four for delimiters, etc.
966 | #
967 | # * You can use spaces to get literal backticks at the edges:
968 | #
969 | # ... type `` `bar` `` ...
970 | #
971 | # Turns to:
972 | #
973 | # ... type `bar` ...
974 | #
975 |
976 | my $text = shift;
977 |
978 | $text =~ s@
979 | (`+) # $1 = Opening run of `
980 | (.+?) # $2 = The code block
981 | (?$c";
990 | @egsx;
991 |
992 | return $text;
993 | }
994 |
995 |
996 | sub _EncodeCode {
997 | #
998 | # Encode/escape certain characters inside Markdown code runs.
999 | # The point is that in code, these characters are literals,
1000 | # and lose their special Markdown meanings.
1001 | #
1002 | local $_ = shift;
1003 |
1004 | # Encode all ampersands; HTML entities are not
1005 | # entities within a Markdown code span.
1006 | s/&/&/g;
1007 |
1008 | # Encode $'s, but only if we're running under Blosxom.
1009 | # (Blosxom interpolates Perl variables in article bodies.)
1010 | {
1011 | no warnings 'once';
1012 | if (defined($blosxom::version)) {
1013 | s/\$/$/g;
1014 | }
1015 | }
1016 |
1017 |
1018 | # Do the angle bracket song and dance:
1019 | s! < !<!gx;
1020 | s! > !>!gx;
1021 |
1022 | # Now, escape characters that are magic in Markdown:
1023 | s! \* !$g_escape_table{'*'}!gx;
1024 | s! _ !$g_escape_table{'_'}!gx;
1025 | s! { !$g_escape_table{'{'}!gx;
1026 | s! } !$g_escape_table{'}'}!gx;
1027 | s! \[ !$g_escape_table{'['}!gx;
1028 | s! \] !$g_escape_table{']'}!gx;
1029 | s! \\ !$g_escape_table{'\\'}!gx;
1030 |
1031 | return $_;
1032 | }
1033 |
1034 |
1035 | sub _DoItalicsAndBold {
1036 | my $text = shift;
1037 |
1038 | # must go first:
1039 | $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1040 | {$2}gsx;
1041 |
1042 | $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1043 | {$2}gsx;
1044 |
1045 | return $text;
1046 | }
1047 |
1048 |
1049 | sub _DoBlockQuotes {
1050 | my $text = shift;
1051 |
1052 | $text =~ s{
1053 | ( # Wrap whole match in $1
1054 | (
1055 | ^[ \t]*>[ \t]? # '>' at the start of a line
1056 | .+\n # rest of the first line
1057 | (.+\n)* # subsequent consecutive lines
1058 | \n* # blanks
1059 | )+
1060 | )
1061 | }{
1062 | my $bq = $1;
1063 | $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1064 | $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1065 | $bq = _RunBlockGamut($bq); # recurse
1066 |
1067 | $bq =~ s/^/ /g;
1068 | # These leading spaces screw with content, so we need to fix that:
1069 | $bq =~ s{
1070 | (\s*.+?
)
1071 | }{
1072 | my $pre = $1;
1073 | $pre =~ s/^ //mg;
1074 | $pre;
1075 | }egsx;
1076 |
1077 | "\n$bq\n
\n\n";
1078 | }egmx;
1079 |
1080 |
1081 | return $text;
1082 | }
1083 |
1084 |
1085 | sub _FormParagraphs {
1086 | #
1087 | # Params:
1088 | # $text - string to process with html tags
1089 | #
1090 | my $text = shift;
1091 |
1092 | # Strip leading and trailing lines:
1093 | $text =~ s/\A\n+//;
1094 | $text =~ s/\n+\z//;
1095 |
1096 | my @grafs = split(/\n{2,}/, $text);
1097 |
1098 | #
1099 | # Wrap
tags.
1100 | #
1101 | foreach (@grafs) {
1102 | unless (defined( $g_html_blocks{$_} )) {
1103 | $_ = _RunSpanGamut($_);
1104 | s/^([ \t]*)/
/;
1105 | $_ .= "
";
1106 | }
1107 | }
1108 |
1109 | #
1110 | # Unhashify HTML blocks
1111 | #
1112 | foreach (@grafs) {
1113 | if (defined( $g_html_blocks{$_} )) {
1114 | $_ = $g_html_blocks{$_};
1115 | }
1116 | }
1117 |
1118 | return join "\n\n", @grafs;
1119 | }
1120 |
1121 |
1122 | sub _EncodeAmpsAndAngles {
1123 | # Smart processing for ampersands and angle brackets that need to be encoded.
1124 |
1125 | my $text = shift;
1126 |
1127 | # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1128 | # http://bumppo.net/projects/amputator/
1129 | $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1130 |
1131 | # Encode naked <'s
1132 | $text =~ s{<(?![a-z/?\$!])}{<}gi;
1133 |
1134 | return $text;
1135 | }
1136 |
1137 |
1138 | sub _EncodeBackslashEscapes {
1139 | #
1140 | # Parameter: String.
1141 | # Returns: The string, with after processing the following backslash
1142 | # escape sequences.
1143 | #
1144 | local $_ = shift;
1145 |
1146 | s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
1147 | s! \\` !$g_escape_table{'`'}!gx;
1148 | s! \\\* !$g_escape_table{'*'}!gx;
1149 | s! \\_ !$g_escape_table{'_'}!gx;
1150 | s! \\\{ !$g_escape_table{'{'}!gx;
1151 | s! \\\} !$g_escape_table{'}'}!gx;
1152 | s! \\\[ !$g_escape_table{'['}!gx;
1153 | s! \\\] !$g_escape_table{']'}!gx;
1154 | s! \\\( !$g_escape_table{'('}!gx;
1155 | s! \\\) !$g_escape_table{')'}!gx;
1156 | s! \\> !$g_escape_table{'>'}!gx;
1157 | s! \\\# !$g_escape_table{'#'}!gx;
1158 | s! \\\+ !$g_escape_table{'+'}!gx;
1159 | s! \\\- !$g_escape_table{'-'}!gx;
1160 | s! \\\. !$g_escape_table{'.'}!gx;
1161 | s{ \\! }{$g_escape_table{'!'}}gx;
1162 |
1163 | return $_;
1164 | }
1165 |
1166 |
1167 | sub _DoAutoLinks {
1168 | my $text = shift;
1169 |
1170 | $text =~ s{<((https?|ftp):[^'">\s]+)>}{$1}gi;
1171 |
1172 | # Email addresses:
1173 | $text =~ s{
1174 | <
1175 | (?:mailto:)?
1176 | (
1177 | [-.\w]+
1178 | \@
1179 | [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1180 | )
1181 | >
1182 | }{
1183 | _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1184 | }egix;
1185 |
1186 | return $text;
1187 | }
1188 |
1189 |
1190 | sub _EncodeEmailAddress {
1191 | #
1192 | # Input: an email address, e.g. "foo@example.com"
1193 | #
1194 | # Output: the email address as a mailto link, with each character
1195 | # of the address encoded as either a decimal or hex entity, in
1196 | # the hopes of foiling most address harvesting spam bots. E.g.:
1197 | #
1198 | # foo
1200 | # @example.com
1201 | #
1202 | # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1203 | # mailing list:
1204 | #
1205 |
1206 | my $addr = shift;
1207 |
1208 | srand;
1209 | my @encode = (
1210 | sub { '' . ord(shift) . ';' },
1211 | sub { '' . sprintf( "%X", ord(shift) ) . ';' },
1212 | sub { shift },
1213 | );
1214 |
1215 | $addr = "mailto:" . $addr;
1216 |
1217 | $addr =~ s{(.)}{
1218 | my $char = $1;
1219 | if ( $char eq '@' ) {
1220 | # this *must* be encoded. I insist.
1221 | $char = $encode[int rand 1]->($char);
1222 | } elsif ( $char ne ':' ) {
1223 | # leave ':' alone (to spot mailto: later)
1224 | my $r = rand;
1225 | # roughly 10% raw, 45% hex, 45% dec
1226 | $char = (
1227 | $r > .9 ? $encode[2]->($char) :
1228 | $r < .45 ? $encode[1]->($char) :
1229 | $encode[0]->($char)
1230 | );
1231 | }
1232 | $char;
1233 | }gex;
1234 |
1235 | $addr = qq{$addr};
1236 | $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1237 |
1238 | return $addr;
1239 | }
1240 |
1241 |
1242 | sub _UnescapeSpecialChars {
1243 | #
1244 | # Swap back in all the special characters we've hidden.
1245 | #
1246 | my $text = shift;
1247 |
1248 | while( my($char, $hash) = each(%g_escape_table) ) {
1249 | $text =~ s/$hash/$char/g;
1250 | }
1251 | return $text;
1252 | }
1253 |
1254 |
1255 | sub _TokenizeHTML {
1256 | #
1257 | # Parameter: String containing HTML markup.
1258 | # Returns: Reference to an array of the tokens comprising the input
1259 | # string. Each token is either a tag (possibly with nested,
1260 | # tags contained therein, such as , or a
1261 | # run of text between tags. Each element of the array is a
1262 | # two-element array; the first is either 'tag' or 'text';
1263 | # the second is the actual value.
1264 | #
1265 | #
1266 | # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1267 | #
1268 | #
1269 |
1270 | my $str = shift;
1271 | my $pos = 0;
1272 | my $len = length $str;
1273 | my @tokens;
1274 |
1275 | my $depth = 6;
1276 | my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
1277 | my $match = qr/(?s: ) | # comment
1278 | (?s: <\? .*? \?> ) | # processing instruction
1279 | $nested_tags/ix; # nested tags
1280 |
1281 | while ($str =~ m/($match)/g) {
1282 | my $whole_tag = $1;
1283 | my $sec_start = pos $str;
1284 | my $tag_start = $sec_start - length $whole_tag;
1285 | if ($pos < $tag_start) {
1286 | push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1287 | }
1288 | push @tokens, ['tag', $whole_tag];
1289 | $pos = pos $str;
1290 | }
1291 | push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1292 | \@tokens;
1293 | }
1294 |
1295 |
1296 | sub _Outdent {
1297 | #
1298 | # Remove one level of line-leading tabs or spaces
1299 | #
1300 | my $text = shift;
1301 |
1302 | $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1303 | return $text;
1304 | }
1305 |
1306 |
1307 | sub _Detab {
1308 | #
1309 | # Cribbed from a post by Bart Lateur:
1310 | #
1311 | #
1312 | my $text = shift;
1313 |
1314 | $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1315 | return $text;
1316 | }
1317 |
1318 |
1319 | 1;
1320 |
1321 | __END__
1322 |
1323 |
1324 | =pod
1325 |
1326 | =head1 NAME
1327 |
1328 | B
1329 |
1330 |
1331 | =head1 SYNOPSIS
1332 |
1333 | B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
1334 | [ I ... ]
1335 |
1336 |
1337 | =head1 DESCRIPTION
1338 |
1339 | Markdown is a text-to-HTML filter; it translates an easy-to-read /
1340 | easy-to-write structured text format into HTML. Markdown's text format
1341 | is most similar to that of plain text email, and supports features such
1342 | as headers, *emphasis*, code blocks, blockquotes, and links.
1343 |
1344 | Markdown's syntax is designed not as a generic markup language, but
1345 | specifically to serve as a front-end to (X)HTML. You can use span-level
1346 | HTML tags anywhere in a Markdown document, and you can use block level
1347 | HTML tags (like and as well).
1348 |
1349 | For more information about Markdown's syntax, see:
1350 |
1351 | http://daringfireball.net/projects/markdown/
1352 |
1353 |
1354 | =head1 OPTIONS
1355 |
1356 | Use "--" to end switch parsing. For example, to open a file named "-z", use:
1357 |
1358 | Markdown.pl -- -z
1359 |
1360 | =over 4
1361 |
1362 |
1363 | =item B<--html4tags>
1364 |
1365 | Use HTML 4 style for empty element tags, e.g.:
1366 |
1367 |
1368 |
1369 | instead of Markdown's default XHTML style tags, e.g.:
1370 |
1371 |
1372 |
1373 |
1374 | =item B<-v>, B<--version>
1375 |
1376 | Display Markdown's version number and copyright information.
1377 |
1378 |
1379 | =item B<-s>, B<--shortversion>
1380 |
1381 | Display the short-form version number.
1382 |
1383 |
1384 | =back
1385 |
1386 |
1387 |
1388 | =head1 BUGS
1389 |
1390 | To file bug reports or feature requests (other than topics listed in the
1391 | Caveats section above) please send email to:
1392 |
1393 | support@daringfireball.net
1394 |
1395 | Please include with your report: (1) the example input; (2) the output
1396 | you expected; (3) the output Markdown actually produced.
1397 |
1398 |
1399 | =head1 VERSION HISTORY
1400 |
1401 | See the readme file for detailed release notes for this version.
1402 |
1403 | 1.0.1 - 14 Dec 2004
1404 |
1405 | 1.0 - 28 Aug 2004
1406 |
1407 |
1408 | =head1 AUTHOR
1409 |
1410 | John Gruber
1411 | http://daringfireball.net
1412 |
1413 | PHP port and other contributions by Michel Fortin
1414 | http://michelf.com
1415 |
1416 |
1417 | =head1 COPYRIGHT AND LICENSE
1418 |
1419 | Copyright (c) 2003-2004 John Gruber
1420 |
1421 | All rights reserved.
1422 |
1423 | Redistribution and use in source and binary forms, with or without
1424 | modification, are permitted provided that the following conditions are
1425 | met:
1426 |
1427 | * Redistributions of source code must retain the above copyright notice,
1428 | this list of conditions and the following disclaimer.
1429 |
1430 | * Redistributions in binary form must reproduce the above copyright
1431 | notice, this list of conditions and the following disclaimer in the
1432 | documentation and/or other materials provided with the distribution.
1433 |
1434 | * Neither the name "Markdown" nor the names of its contributors may
1435 | be used to endorse or promote products derived from this software
1436 | without specific prior written permission.
1437 |
1438 | This software is provided by the copyright holders and contributors "as
1439 | is" and any express or implied warranties, including, but not limited
1440 | to, the implied warranties of merchantability and fitness for a
1441 | particular purpose are disclaimed. In no event shall the copyright owner
1442 | or contributors be liable for any direct, indirect, incidental, special,
1443 | exemplary, or consequential damages (including, but not limited to,
1444 | procurement of substitute goods or services; loss of use, data, or
1445 | profits; or business interruption) however caused and on any theory of
1446 | liability, whether in contract, strict liability, or tort (including
1447 | negligence or otherwise) arising in any way out of the use of this
1448 | software, even if advised of the possibility of such damage.
1449 |
1450 | =cut
1451 |
--------------------------------------------------------------------------------