├── LICENSE ├── Makefile ├── README.md ├── alltests.md └── src ├── main.c ├── pdf.c └── pdf.h /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, John MacFarlane 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CCFLAGS = -g 2 | UNAME_S := $(shell uname -s) 3 | ifeq ($(UNAME_S),Linux) 4 | CCFLAGS += -D _LINUX 5 | endif 6 | ifeq ($(UNAME_S),Darwin) 7 | CCFLAGS += -D _OSX 8 | endif 9 | 10 | .PHONY: all clean leakcheck 11 | 12 | all: cmarkpdf 13 | 14 | %.o: src/%.c 15 | $(CC) -Wall -c $< -o $@ $(CCFLAGS) 16 | 17 | cmarkpdf: main.o pdf.o 18 | $(CC) $^ -o $@ $(CCFLAGS) -lhpdf -lcmark 19 | 20 | leakcheck: 21 | valgrind -q --leak-check=full --dsymutil=yes --error-exitcode=1 ./cmarkpdf -o leakcheck.pdf alltests.md 22 | 23 | clean: 24 | -rm *.o cmarkpdf 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cmarkpdf 2 | ======== 3 | 4 | This is an experimental native PDF renderer for 5 | [cmark](https://github.com/jgm/cmark), using 6 | [libharu](https://github.com/libharu/libharu). 7 | 8 | It links dynamically against both libcmark and 9 | libhpdf (libharu), which must be installed. 10 | A recent version of libhpdf that supports UTF-8 11 | encoding is needed. 12 | 13 | To build on Linux or OSX, `make`. 14 | 15 | To use: 16 | 17 | ./cmarkpdf --smart -o output.pdf input.txt 18 | 19 | Note that for now, paths to fonts are hardcoded in `src/pdf.c` 20 | and may need to be adjusted if your system puts fonts 21 | in a different place or has different fonts. 22 | 23 | Roadmap 24 | ------- 25 | 26 | - [x] Line wrapping and justification, using greedy 27 | algorithm. (In the future we might explore using 28 | Knuth-Prass.) 29 | - [x] List items (currently all treated as bulleted). 30 | - [x] Bullet lists should use different bullets at different 31 | indent level. 32 | - [x] Proper list markers for ordered lists 33 | - [x] Code spans 34 | - [x] Code blocks 35 | - [x] Block quotes 36 | - [x] Hrules 37 | - [x] Headers 38 | - [x] Strong 39 | - [x] Emph 40 | - [x] Links 41 | - [ ] Images: support jpeg too 42 | - [ ] Adjust vert space before rendering line so images get right 43 | space 44 | - [ ] Better error handling (check status of every function) 45 | - [ ] Customizability (e.g. selecting fonts, margins, sizes) 46 | - [ ] Ensure that PDF is searchable and copy-pasteable. 47 | Currently the PDFs work well is some viewers (e.g. Chrome's or 48 | Adobe Reader) but not in others. In Ubuntu Evince the text is not 49 | selectable. In OSX Mavericks Preview, the text can be selected 50 | and copied, but it is garbled when pasted. See 51 | https://groups.google.com/forum/#!searchin/libharu/copy$20paste/libharu/YzXoH_K3OAI/YNCsn6XXF-gJ, 52 | http://superuser.com/questions/137824/pdf-has-garbled-text-when-copy-pasting, 53 | and comments in the code. 54 | -------------------------------------------------------------------------------- /alltests.md: -------------------------------------------------------------------------------- 1 | foo baz bim 2 | 3 | foo baz bim 4 | 5 | a a 6 | ὐ a 7 | 8 | - foo 9 | 10 | bar 11 | 12 | > foo bar 13 | 14 | - `one 15 | - two` 16 | 17 | *** 18 | --- 19 | ___ 20 | 21 | +++ 22 | 23 | === 24 | 25 | -- 26 | ** 27 | __ 28 | 29 | *** 30 | *** 31 | *** 32 | 33 | *** 34 | 35 | Foo 36 | *** 37 | 38 | _____________________________________ 39 | 40 | - - - 41 | 42 | ** * ** * ** * ** 43 | 44 | - - - - 45 | 46 | - - - - 47 | 48 | _ _ _ _ a 49 | 50 | a------ 51 | 52 | ---a--- 53 | 54 | *-* 55 | 56 | - foo 57 | *** 58 | - bar 59 | 60 | Foo 61 | *** 62 | bar 63 | 64 | Foo 65 | --- 66 | bar 67 | 68 | * Foo 69 | * * * 70 | * Bar 71 | 72 | - Foo 73 | - * * * 74 | 75 | # foo 76 | ## foo 77 | ### foo 78 | #### foo 79 | ##### foo 80 | ###### foo 81 | 82 | ####### foo 83 | 84 | #5 bolt 85 | 86 | #foobar 87 | 88 | \## foo 89 | 90 | # foo *bar* \*baz\* 91 | 92 | # foo 93 | 94 | ### foo 95 | ## foo 96 | # foo 97 | 98 | # foo 99 | 100 | foo 101 | # bar 102 | 103 | ## foo ## 104 | ### bar ### 105 | 106 | # foo ################################## 107 | ##### foo ## 108 | 109 | ### foo ### 110 | 111 | ### foo ### b 112 | 113 | # foo# 114 | 115 | ### foo \### 116 | ## foo #\## 117 | # foo \# 118 | 119 | **** 120 | ## foo 121 | **** 122 | 123 | Foo bar 124 | # baz 125 | Bar foo 126 | 127 | ## 128 | # 129 | ### ### 130 | 131 | Foo *bar* 132 | ========= 133 | 134 | Foo *bar* 135 | --------- 136 | 137 | Foo 138 | ------------------------- 139 | 140 | Foo 141 | = 142 | 143 | Foo 144 | --- 145 | 146 | Foo 147 | ----- 148 | 149 | Foo 150 | === 151 | 152 | Foo 153 | --- 154 | 155 | Foo 156 | --- 157 | 158 | Foo 159 | ---- 160 | 161 | Foo 162 | --- 163 | 164 | Foo 165 | = = 166 | 167 | Foo 168 | --- - 169 | 170 | Foo 171 | ----- 172 | 173 | Foo\ 174 | ---- 175 | 176 | `Foo 177 | ---- 178 | ` 179 | 180 | 183 | 184 | > Foo 185 | --- 186 | 187 | - Foo 188 | --- 189 | 190 | Foo 191 | Bar 192 | --- 193 | 194 | Foo 195 | Bar 196 | === 197 | 198 | --- 199 | Foo 200 | --- 201 | Bar 202 | --- 203 | Baz 204 | 205 | 206 | ==== 207 | 208 | --- 209 | --- 210 | 211 | - foo 212 | ----- 213 | 214 | foo 215 | --- 216 | 217 | > foo 218 | ----- 219 | 220 | \> foo 221 | ------ 222 | 223 | a simple 224 | indented code block 225 | 226 | - foo 227 | 228 | bar 229 | 230 | 1. foo 231 | 232 | - bar 233 | 234 | 235 | *hi* 236 | 237 | - one 238 | 239 | chunk1 240 | 241 | chunk2 242 | 243 | 244 | 245 | chunk3 246 | 247 | chunk1 248 | 249 | chunk2 250 | 251 | Foo 252 | bar 253 | 254 | 255 | foo 256 | bar 257 | 258 | # Header 259 | foo 260 | Header 261 | ------ 262 | foo 263 | ---- 264 | 265 | foo 266 | bar 267 | 268 | 269 | 270 | foo 271 | 272 | 273 | 274 | foo 275 | 276 | ``` 277 | < 278 | > 279 | ``` 280 | 281 | ~~~ 282 | < 283 | > 284 | ~~~ 285 | 286 | ``` 287 | aaa 288 | ~~~ 289 | ``` 290 | 291 | ~~~ 292 | aaa 293 | ``` 294 | ~~~ 295 | 296 | ```` 297 | aaa 298 | ``` 299 | `````` 300 | 301 | ~~~~ 302 | aaa 303 | ~~~ 304 | ~~~~ 305 | 306 | ``` 307 | 308 | ````` 309 | 310 | ``` 311 | aaa 312 | 313 | > ``` 314 | > aaa 315 | 316 | bbb 317 | 318 | ``` 319 | 320 | 321 | ``` 322 | 323 | ``` 324 | ``` 325 | 326 | ``` 327 | aaa 328 | aaa 329 | ``` 330 | 331 | ``` 332 | aaa 333 | aaa 334 | aaa 335 | ``` 336 | 337 | ``` 338 | aaa 339 | aaa 340 | aaa 341 | ``` 342 | 343 | ``` 344 | aaa 345 | ``` 346 | 347 | ``` 348 | aaa 349 | ``` 350 | 351 | ``` 352 | aaa 353 | ``` 354 | 355 | ``` 356 | aaa 357 | ``` 358 | 359 | ``` ``` 360 | aaa 361 | 362 | ~~~~~~ 363 | aaa 364 | ~~~ ~~ 365 | 366 | foo 367 | ``` 368 | bar 369 | ``` 370 | baz 371 | 372 | foo 373 | --- 374 | ~~~ 375 | bar 376 | ~~~ 377 | # baz 378 | 379 | ```ruby 380 | def foo(x) 381 | return 3 382 | end 383 | ``` 384 | 385 | ~~~~ ruby startline=3 $%@#$ 386 | def foo(x) 387 | return 3 388 | end 389 | ~~~~~~~ 390 | 391 | ````; 392 | ```` 393 | 394 | ``` aa ``` 395 | foo 396 | 397 | ``` 398 | ``` aaa 399 | ``` 400 | 401 | 402 | 403 | 406 | 407 |
404 | hi 405 |
408 | 409 | okay. 410 | 411 |
412 | *hello* 413 | 414 | 415 |
416 | *foo* 417 | 418 |
419 | 420 | *Markdown* 421 | 422 |
423 | 424 |
426 |
427 | 428 |
430 |
431 | 432 |
433 | *foo* 434 | 435 | *bar* 436 | 437 |
*foo*
447 | 448 |
449 | foo 450 |
451 | 452 |
453 | ``` c 454 | int x = 33; 455 | ``` 456 | 457 | 458 | *bar* 459 | 460 | 461 | 462 | *bar* 463 | 464 | 465 | 466 | *bar* 467 | 468 | 469 | 470 | *foo* 471 | 472 | 473 | 474 | 475 | *foo* 476 | 477 | 478 | 479 | *foo* 480 | 481 |

 482 | import Text.HTML.TagSoup
 483 | 
 484 | main :: IO ()
 485 | main = print $ parseTags tags
 486 | 
487 | 488 | 493 | 494 | 500 | 501 | 515 | *foo* 516 | 517 | *bar* 518 | *baz* 519 | 520 | 1. *bar* 523 | 524 | 528 | 529 | '; 532 | 533 | ?> 534 | 535 | 536 | 537 | 549 | 550 | 551 | 552 | 553 | 554 |
555 | 556 |
557 | 558 | Foo 559 |
560 | bar 561 |
562 | 563 |
564 | bar 565 |
566 | *foo* 567 | 568 | Foo 569 | 570 | baz 571 | 572 |
573 | 574 | *Emphasized* text. 575 | 576 |
577 | 578 |
579 | *Emphasized* text. 580 |
581 | 582 | 583 | 584 | 585 | 586 | 589 | 590 | 591 | 592 |
587 | Hi 588 |
593 | 594 | 595 | 596 | 597 | 598 | 601 | 602 | 603 | 604 |
599 | Hi 600 |
605 | 606 | [foo]: /url "title" 607 | 608 | [foo] 609 | 610 | [foo]: 611 | /url 612 | 'the title' 613 | 614 | [foo] 615 | 616 | [Foo*bar\]]:my_(url) 'title (with parens)' 617 | 618 | [Foo*bar\]] 619 | 620 | [Foo bar]: 621 | 622 | 'title' 623 | 624 | [Foo bar] 625 | 626 | [foo]: /url ' 627 | title 628 | line1 629 | line2 630 | ' 631 | 632 | [foo] 633 | 634 | [foo]: /url 'title 635 | 636 | with blank line' 637 | 638 | [foo] 639 | 640 | [foo]: 641 | /url 642 | 643 | [foo] 644 | 645 | [foo]: 646 | 647 | [foo] 648 | 649 | [foo]: /url\bar\*baz "foo\"bar\baz" 650 | 651 | [foo] 652 | 653 | [foo] 654 | 655 | [foo]: url 656 | 657 | [foo] 658 | 659 | [foo]: first 660 | [foo]: second 661 | 662 | [FOO]: /url 663 | 664 | [Foo] 665 | 666 | [ΑΓΩ]: /φου 667 | 668 | [αγω] 669 | 670 | [foo]: /url 671 | 672 | [ 673 | foo 674 | ]: /url 675 | bar 676 | 677 | [foo]: /url "title" ok 678 | 679 | [foo]: /url 680 | "title" ok 681 | 682 | [foo]: /url "title" 683 | 684 | [foo] 685 | 686 | ``` 687 | [foo]: /url 688 | ``` 689 | 690 | [foo] 691 | 692 | Foo 693 | [bar]: /baz 694 | 695 | [bar] 696 | 697 | # [Foo] 698 | [foo]: /url 699 | > bar 700 | 701 | [foo]: /foo-url "foo" 702 | [bar]: /bar-url 703 | "bar" 704 | [baz]: /baz-url 705 | 706 | [foo], 707 | [bar], 708 | [baz] 709 | 710 | [foo] 711 | 712 | > [foo]: /url 713 | 714 | aaa 715 | 716 | bbb 717 | 718 | aaa 719 | bbb 720 | 721 | ccc 722 | ddd 723 | 724 | aaa 725 | 726 | 727 | bbb 728 | 729 | aaa 730 | bbb 731 | 732 | aaa 733 | bbb 734 | ccc 735 | 736 | aaa 737 | bbb 738 | 739 | aaa 740 | bbb 741 | 742 | aaa 743 | bbb 744 | 745 | 746 | 747 | aaa 748 | 749 | 750 | # aaa 751 | 752 | 753 | 754 | > # Foo 755 | > bar 756 | > baz 757 | 758 | ># Foo 759 | >bar 760 | > baz 761 | 762 | > # Foo 763 | > bar 764 | > baz 765 | 766 | > # Foo 767 | > bar 768 | > baz 769 | 770 | > # Foo 771 | > bar 772 | baz 773 | 774 | > bar 775 | baz 776 | > foo 777 | 778 | > foo 779 | --- 780 | 781 | > - foo 782 | - bar 783 | 784 | > foo 785 | bar 786 | 787 | > ``` 788 | foo 789 | ``` 790 | 791 | > 792 | 793 | > 794 | > 795 | > 796 | 797 | > 798 | > foo 799 | > 800 | 801 | > foo 802 | 803 | > bar 804 | 805 | > foo 806 | > bar 807 | 808 | > foo 809 | > 810 | > bar 811 | 812 | foo 813 | > bar 814 | 815 | > aaa 816 | *** 817 | > bbb 818 | 819 | > bar 820 | baz 821 | 822 | > bar 823 | 824 | baz 825 | 826 | > bar 827 | > 828 | baz 829 | 830 | > > > foo 831 | bar 832 | 833 | >>> foo 834 | > bar 835 | >>baz 836 | 837 | > code 838 | 839 | > not code 840 | 841 | A paragraph 842 | with two lines. 843 | 844 | indented code 845 | 846 | > A block quote. 847 | 848 | 1. A paragraph 849 | with two lines. 850 | 851 | indented code 852 | 853 | > A block quote. 854 | 855 | - one 856 | 857 | two 858 | 859 | - one 860 | 861 | two 862 | 863 | - one 864 | 865 | two 866 | 867 | - one 868 | 869 | two 870 | 871 | > > 1. one 872 | >> 873 | >> two 874 | 875 | >>- one 876 | >> 877 | > > two 878 | 879 | -one 880 | 881 | 2.two 882 | 883 | - foo 884 | 885 | bar 886 | 887 | - foo 888 | 889 | 890 | bar 891 | 892 | - ``` 893 | foo 894 | 895 | 896 | bar 897 | ``` 898 | 899 | - baz 900 | 901 | + ``` 902 | foo 903 | 904 | 905 | bar 906 | ``` 907 | 908 | 1. foo 909 | 910 | ``` 911 | bar 912 | ``` 913 | 914 | baz 915 | 916 | > bam 917 | 918 | 123456789. ok 919 | 920 | 1234567890. not ok 921 | 922 | 0. ok 923 | 924 | 003. ok 925 | 926 | -1. not ok 927 | 928 | - foo 929 | 930 | bar 931 | 932 | 10. foo 933 | 934 | bar 935 | 936 | indented code 937 | 938 | paragraph 939 | 940 | more code 941 | 942 | 1. indented code 943 | 944 | paragraph 945 | 946 | more code 947 | 948 | 1. indented code 949 | 950 | paragraph 951 | 952 | more code 953 | 954 | foo 955 | 956 | bar 957 | 958 | - foo 959 | 960 | bar 961 | 962 | - foo 963 | 964 | bar 965 | 966 | - 967 | foo 968 | - 969 | ``` 970 | bar 971 | ``` 972 | - 973 | baz 974 | 975 | - foo 976 | - 977 | - bar 978 | 979 | - foo 980 | - 981 | - bar 982 | 983 | 1. foo 984 | 2. 985 | 3. bar 986 | 987 | * 988 | 989 | 1. A paragraph 990 | with two lines. 991 | 992 | indented code 993 | 994 | > A block quote. 995 | 996 | 1. A paragraph 997 | with two lines. 998 | 999 | indented code 1000 | 1001 | > A block quote. 1002 | 1003 | 1. A paragraph 1004 | with two lines. 1005 | 1006 | indented code 1007 | 1008 | > A block quote. 1009 | 1010 | 1. A paragraph 1011 | with two lines. 1012 | 1013 | indented code 1014 | 1015 | > A block quote. 1016 | 1017 | 1. A paragraph 1018 | with two lines. 1019 | 1020 | indented code 1021 | 1022 | > A block quote. 1023 | 1024 | 1. A paragraph 1025 | with two lines. 1026 | 1027 | > 1. > Blockquote 1028 | continued here. 1029 | 1030 | > 1. > Blockquote 1031 | > continued here. 1032 | 1033 | - foo 1034 | - bar 1035 | - baz 1036 | 1037 | - foo 1038 | - bar 1039 | - baz 1040 | 1041 | 10) foo 1042 | - bar 1043 | 1044 | 10) foo 1045 | - bar 1046 | 1047 | - - foo 1048 | 1049 | 1. - 2. foo 1050 | 1051 | - # Foo 1052 | - Bar 1053 | --- 1054 | baz 1055 | 1056 | - foo 1057 | - bar 1058 | + baz 1059 | 1060 | 1. foo 1061 | 2. bar 1062 | 3) baz 1063 | 1064 | Foo 1065 | - bar 1066 | - baz 1067 | 1068 | The number of windows in my house is 1069 | 14. The number of doors is 6. 1070 | 1071 | - foo 1072 | 1073 | - bar 1074 | 1075 | 1076 | - baz 1077 | 1078 | - foo 1079 | 1080 | 1081 | bar 1082 | - baz 1083 | 1084 | - foo 1085 | - bar 1086 | - baz 1087 | 1088 | 1089 | bim 1090 | 1091 | - foo 1092 | - bar 1093 | 1094 | 1095 | - baz 1096 | - bim 1097 | 1098 | - foo 1099 | 1100 | notcode 1101 | 1102 | - foo 1103 | 1104 | 1105 | code 1106 | 1107 | - a 1108 | - b 1109 | - c 1110 | - d 1111 | - e 1112 | - f 1113 | - g 1114 | - h 1115 | - i 1116 | 1117 | 1. a 1118 | 1119 | 2. b 1120 | 1121 | 3. c 1122 | 1123 | - a 1124 | - b 1125 | 1126 | - c 1127 | 1128 | * a 1129 | * 1130 | 1131 | * c 1132 | 1133 | - a 1134 | - b 1135 | 1136 | c 1137 | - d 1138 | 1139 | - a 1140 | - b 1141 | 1142 | [ref]: /url 1143 | - d 1144 | 1145 | - a 1146 | - ``` 1147 | b 1148 | 1149 | 1150 | ``` 1151 | - c 1152 | 1153 | - a 1154 | - b 1155 | 1156 | c 1157 | - d 1158 | 1159 | * a 1160 | > b 1161 | > 1162 | * c 1163 | 1164 | - a 1165 | > b 1166 | ``` 1167 | c 1168 | ``` 1169 | - d 1170 | 1171 | - a 1172 | 1173 | - a 1174 | - b 1175 | 1176 | 1. ``` 1177 | foo 1178 | ``` 1179 | 1180 | bar 1181 | 1182 | * foo 1183 | * bar 1184 | 1185 | baz 1186 | 1187 | - a 1188 | - b 1189 | - c 1190 | 1191 | - d 1192 | - e 1193 | - f 1194 | 1195 | `hi`lo` 1196 | 1197 | \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ 1198 | 1199 | \ \A\a\ \3\φ\« 1200 | 1201 | \*not emphasized* 1202 | \
not a tag 1203 | \[not a link](/foo) 1204 | \`not code` 1205 | 1\. not a list 1206 | \* not a list 1207 | \# not a header 1208 | \[foo]: /url "not a reference" 1209 | 1210 | \\*emphasis* 1211 | 1212 | foo\ 1213 | bar 1214 | 1215 | `` \[\` `` 1216 | 1217 | \[\] 1218 | 1219 | ~~~ 1220 | \[\] 1221 | ~~~ 1222 | 1223 | 1224 | 1225 |
1226 | 1227 | [foo](/bar\* "ti\*tle") 1228 | 1229 | [foo] 1230 | 1231 | [foo]: /bar\* "ti\*tle" 1232 | 1233 | ``` foo\+bar 1234 | foo 1235 | ``` 1236 | 1237 |   & © Æ Ď 1238 | ¾ ℋ ⅆ 1239 | ∲ ≧̸ 1240 | 1241 | # Ӓ Ϡ � � 1242 | 1243 | " ആ ಫ 1244 | 1245 |   &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?; 1246 | 1247 | © 1248 | 1249 | &MadeUpEntity; 1250 | 1251 | 1252 | 1253 | [foo](/föö "föö") 1254 | 1255 | [foo] 1256 | 1257 | [foo]: /föö "föö" 1258 | 1259 | ``` föö 1260 | foo 1261 | ``` 1262 | 1263 | `föö` 1264 | 1265 | föfö 1266 | 1267 | `foo` 1268 | 1269 | `` foo ` bar `` 1270 | 1271 | ` `` ` 1272 | 1273 | `` 1274 | foo 1275 | `` 1276 | 1277 | `foo bar 1278 | baz` 1279 | 1280 | `foo `` bar` 1281 | 1282 | `foo\`bar` 1283 | 1284 | *foo`*` 1285 | 1286 | [not a `link](/foo`) 1287 | 1288 | `` 1289 | 1290 | ` 1291 | 1292 | `` 1293 | 1294 | ` 1295 | 1296 | ```foo`` 1297 | 1298 | `foo 1299 | 1300 | *foo bar* 1301 | 1302 | a * foo bar* 1303 | 1304 | a*"foo"* 1305 | 1306 | * a * 1307 | 1308 | foo*bar* 1309 | 1310 | 5*6*78 1311 | 1312 | _foo bar_ 1313 | 1314 | _ foo bar_ 1315 | 1316 | a_"foo"_ 1317 | 1318 | foo_bar_ 1319 | 1320 | 5_6_78 1321 | 1322 | пристаням_стремятся_ 1323 | 1324 | aa_"bb"_cc 1325 | 1326 | foo-_(bar)_ 1327 | 1328 | _foo* 1329 | 1330 | *foo bar * 1331 | 1332 | *foo bar 1333 | * 1334 | 1335 | *(*foo) 1336 | 1337 | *(*foo*)* 1338 | 1339 | *foo*bar 1340 | 1341 | _foo bar _ 1342 | 1343 | _(_foo) 1344 | 1345 | _(_foo_)_ 1346 | 1347 | _foo_bar 1348 | 1349 | _пристаням_стремятся 1350 | 1351 | _foo_bar_baz_ 1352 | 1353 | _(bar)_. 1354 | 1355 | **foo bar** 1356 | 1357 | ** foo bar** 1358 | 1359 | a**"foo"** 1360 | 1361 | foo**bar** 1362 | 1363 | __foo bar__ 1364 | 1365 | __ foo bar__ 1366 | 1367 | __ 1368 | foo bar__ 1369 | 1370 | a__"foo"__ 1371 | 1372 | foo__bar__ 1373 | 1374 | 5__6__78 1375 | 1376 | пристаням__стремятся__ 1377 | 1378 | __foo, __bar__, baz__ 1379 | 1380 | foo-__(bar)__ 1381 | 1382 | **foo bar ** 1383 | 1384 | **(**foo) 1385 | 1386 | *(**foo**)* 1387 | 1388 | **Gomphocarpus (*Gomphocarpus physocarpus*, syn. 1389 | *Asclepias physocarpa*)** 1390 | 1391 | **foo "*bar*" foo** 1392 | 1393 | **foo**bar 1394 | 1395 | __foo bar __ 1396 | 1397 | __(__foo) 1398 | 1399 | _(__foo__)_ 1400 | 1401 | __foo__bar 1402 | 1403 | __пристаням__стремятся 1404 | 1405 | __foo__bar__baz__ 1406 | 1407 | __(bar)__. 1408 | 1409 | *foo [bar](/url)* 1410 | 1411 | *foo 1412 | bar* 1413 | 1414 | _foo __bar__ baz_ 1415 | 1416 | _foo _bar_ baz_ 1417 | 1418 | __foo_ bar_ 1419 | 1420 | *foo *bar** 1421 | 1422 | *foo **bar** baz* 1423 | 1424 | *foo**bar**baz* 1425 | 1426 | ***foo** bar* 1427 | 1428 | *foo **bar*** 1429 | 1430 | *foo**bar*** 1431 | 1432 | *foo **bar *baz* bim** bop* 1433 | 1434 | *foo [*bar*](/url)* 1435 | 1436 | ** is not an empty emphasis 1437 | 1438 | **** is not an empty strong emphasis 1439 | 1440 | **foo [bar](/url)** 1441 | 1442 | **foo 1443 | bar** 1444 | 1445 | __foo _bar_ baz__ 1446 | 1447 | __foo __bar__ baz__ 1448 | 1449 | ____foo__ bar__ 1450 | 1451 | **foo **bar**** 1452 | 1453 | **foo *bar* baz** 1454 | 1455 | **foo*bar*baz** 1456 | 1457 | ***foo* bar** 1458 | 1459 | **foo *bar*** 1460 | 1461 | **foo *bar **baz** 1462 | bim* bop** 1463 | 1464 | **foo [*bar*](/url)** 1465 | 1466 | __ is not an empty emphasis 1467 | 1468 | ____ is not an empty strong emphasis 1469 | 1470 | foo *** 1471 | 1472 | foo *\** 1473 | 1474 | foo *_* 1475 | 1476 | foo ***** 1477 | 1478 | foo **\*** 1479 | 1480 | foo **_** 1481 | 1482 | **foo* 1483 | 1484 | *foo** 1485 | 1486 | ***foo** 1487 | 1488 | ****foo* 1489 | 1490 | **foo*** 1491 | 1492 | *foo**** 1493 | 1494 | foo ___ 1495 | 1496 | foo _\__ 1497 | 1498 | foo _*_ 1499 | 1500 | foo _____ 1501 | 1502 | foo __\___ 1503 | 1504 | foo __*__ 1505 | 1506 | __foo_ 1507 | 1508 | _foo__ 1509 | 1510 | ___foo__ 1511 | 1512 | ____foo_ 1513 | 1514 | __foo___ 1515 | 1516 | _foo____ 1517 | 1518 | **foo** 1519 | 1520 | *_foo_* 1521 | 1522 | __foo__ 1523 | 1524 | _*foo*_ 1525 | 1526 | ****foo**** 1527 | 1528 | ____foo____ 1529 | 1530 | ******foo****** 1531 | 1532 | ***foo*** 1533 | 1534 | _____foo_____ 1535 | 1536 | *foo _bar* baz_ 1537 | 1538 | **foo*bar** 1539 | 1540 | *foo __bar *baz bim__ bam* 1541 | 1542 | **foo **bar baz** 1543 | 1544 | *foo *bar baz* 1545 | 1546 | *[bar*](/url) 1547 | 1548 | _foo [bar_](/url) 1549 | 1550 | * 1551 | 1552 | ** 1553 | 1554 | __ 1555 | 1556 | *a `*`* 1557 | 1558 | _a `_`_ 1559 | 1560 | **a 1561 | 1562 | __a 1563 | 1564 | [link](/uri "title") 1565 | 1566 | [link](/uri) 1567 | 1568 | [link]() 1569 | 1570 | [link](<>) 1571 | 1572 | [link](/my uri) 1573 | 1574 | [link]() 1575 | 1576 | [link](foo 1577 | bar) 1578 | 1579 | [link]() 1581 | 1582 | [link]((foo)and(bar)) 1583 | 1584 | [link](foo(and(bar))) 1585 | 1586 | [link](foo(and\(bar\))) 1587 | 1588 | [link]() 1589 | 1590 | [link](foo\)\:) 1591 | 1592 | [link](#fragment) 1593 | 1594 | [link](http://example.com#fragment) 1595 | 1596 | [link](http://example.com?foo=bar&baz#fragment) 1597 | 1598 | [link](foo\bar) 1599 | 1600 | [link](foo%20bä) 1601 | 1602 | [link]("title") 1603 | 1604 | [link](/url "title") 1605 | [link](/url 'title') 1606 | [link](/url (title)) 1607 | 1608 | [link](/url "title \""") 1609 | 1610 | [link](/url "title "and" title") 1611 | 1612 | [link](/url 'title "and" title') 1613 | 1614 | [link]( /uri 1615 | "title" ) 1616 | 1617 | [link] (/uri) 1618 | 1619 | [link [foo [bar]]](/uri) 1620 | 1621 | [link] bar](/uri) 1622 | 1623 | [link [bar](/uri) 1624 | 1625 | [link \[bar](/uri) 1626 | 1627 | [link *foo **bar** `#`*](/uri) 1628 | 1629 | [![moon](moon.jpg)](/uri) 1630 | 1631 | [foo [bar](/uri)](/uri) 1632 | 1633 | [foo *[bar [baz](/uri)](/uri)*](/uri) 1634 | 1635 | ![[[foo](uri1)](uri2)](uri3) 1636 | 1637 | *[foo*](/uri) 1638 | 1639 | [foo *bar](baz*) 1640 | 1641 | *foo [bar* baz] 1642 | 1643 | [foo 1644 | 1645 | [foo`](/uri)` 1646 | 1647 | [foo 1648 | 1649 | [foo][bar] 1650 | 1651 | [bar]: /url "title" 1652 | 1653 | [link [foo [bar]]][ref] 1654 | 1655 | [ref]: /uri 1656 | 1657 | [link \[bar][ref] 1658 | 1659 | [ref]: /uri 1660 | 1661 | [link *foo **bar** `#`*][ref] 1662 | 1663 | [ref]: /uri 1664 | 1665 | [![moon](moon.jpg)][ref] 1666 | 1667 | [ref]: /uri 1668 | 1669 | [foo [bar](/uri)][ref] 1670 | 1671 | [ref]: /uri 1672 | 1673 | [foo *bar [baz][ref]*][ref] 1674 | 1675 | [ref]: /uri 1676 | 1677 | *[foo*][ref] 1678 | 1679 | [ref]: /uri 1680 | 1681 | [foo *bar][ref] 1682 | 1683 | [ref]: /uri 1684 | 1685 | [foo 1686 | 1687 | [ref]: /uri 1688 | 1689 | [foo`][ref]` 1690 | 1691 | [ref]: /uri 1692 | 1693 | [foo 1694 | 1695 | [ref]: /uri 1696 | 1697 | [foo][BaR] 1698 | 1699 | [bar]: /url "title" 1700 | 1701 | [Толпой][Толпой] is a Russian word. 1702 | 1703 | [ТОЛПОЙ]: /url 1704 | 1705 | [Foo 1706 | bar]: /url 1707 | 1708 | [Baz][Foo bar] 1709 | 1710 | [foo] [bar] 1711 | 1712 | [bar]: /url "title" 1713 | 1714 | [foo] 1715 | [bar] 1716 | 1717 | [bar]: /url "title" 1718 | 1719 | [foo]: /url1 1720 | 1721 | [foo]: /url2 1722 | 1723 | [bar][foo] 1724 | 1725 | [bar][foo\!] 1726 | 1727 | [foo!]: /url 1728 | 1729 | [foo][ref[] 1730 | 1731 | [ref[]: /uri 1732 | 1733 | [foo][ref[bar]] 1734 | 1735 | [ref[bar]]: /uri 1736 | 1737 | [[[foo]]] 1738 | 1739 | [[[foo]]]: /url 1740 | 1741 | [foo][ref\[] 1742 | 1743 | [ref\[]: /uri 1744 | 1745 | [] 1746 | 1747 | []: /uri 1748 | 1749 | [ 1750 | ] 1751 | 1752 | [ 1753 | ]: /uri 1754 | 1755 | [foo][] 1756 | 1757 | [foo]: /url "title" 1758 | 1759 | [*foo* bar][] 1760 | 1761 | [*foo* bar]: /url "title" 1762 | 1763 | [Foo][] 1764 | 1765 | [foo]: /url "title" 1766 | 1767 | [foo] 1768 | [] 1769 | 1770 | [foo]: /url "title" 1771 | 1772 | [foo] 1773 | 1774 | [foo]: /url "title" 1775 | 1776 | [*foo* bar] 1777 | 1778 | [*foo* bar]: /url "title" 1779 | 1780 | [[*foo* bar]] 1781 | 1782 | [*foo* bar]: /url "title" 1783 | 1784 | [[bar [foo] 1785 | 1786 | [foo]: /url 1787 | 1788 | [Foo] 1789 | 1790 | [foo]: /url "title" 1791 | 1792 | [foo] bar 1793 | 1794 | [foo]: /url 1795 | 1796 | \[foo] 1797 | 1798 | [foo]: /url "title" 1799 | 1800 | [foo*]: /url 1801 | 1802 | *[foo*] 1803 | 1804 | [foo][bar] 1805 | 1806 | [foo]: /url1 1807 | [bar]: /url2 1808 | 1809 | [foo][bar][baz] 1810 | 1811 | [baz]: /url 1812 | 1813 | [foo][bar][baz] 1814 | 1815 | [baz]: /url1 1816 | [bar]: /url2 1817 | 1818 | [foo][bar][baz] 1819 | 1820 | [baz]: /url1 1821 | [foo]: /url2 1822 | 1823 | ![foo](/url "title") 1824 | 1825 | ![foo *bar*] 1826 | 1827 | [foo *bar*]: train.jpg "train & tracks" 1828 | 1829 | ![foo ![bar](/url)](/url2) 1830 | 1831 | ![foo [bar](/url)](/url2) 1832 | 1833 | ![foo *bar*][] 1834 | 1835 | [foo *bar*]: train.jpg "train & tracks" 1836 | 1837 | ![foo *bar*][foobar] 1838 | 1839 | [FOOBAR]: train.jpg "train & tracks" 1840 | 1841 | ![foo](train.jpg) 1842 | 1843 | My ![foo bar](/path/to/train.jpg "title" ) 1844 | 1845 | ![foo]() 1846 | 1847 | ![](/url) 1848 | 1849 | ![foo] [bar] 1850 | 1851 | [bar]: /url 1852 | 1853 | ![foo] [bar] 1854 | 1855 | [BAR]: /url 1856 | 1857 | ![foo][] 1858 | 1859 | [foo]: /url "title" 1860 | 1861 | ![*foo* bar][] 1862 | 1863 | [*foo* bar]: /url "title" 1864 | 1865 | ![Foo][] 1866 | 1867 | [foo]: /url "title" 1868 | 1869 | ![foo] 1870 | [] 1871 | 1872 | [foo]: /url "title" 1873 | 1874 | ![foo] 1875 | 1876 | [foo]: /url "title" 1877 | 1878 | ![*foo* bar] 1879 | 1880 | [*foo* bar]: /url "title" 1881 | 1882 | ![[foo]] 1883 | 1884 | [[foo]]: /url "title" 1885 | 1886 | ![Foo] 1887 | 1888 | [foo]: /url "title" 1889 | 1890 | \!\[foo] 1891 | 1892 | [foo]: /url "title" 1893 | 1894 | \![foo] 1895 | 1896 | [foo]: /url "title" 1897 | 1898 | 1899 | 1900 | 1901 | 1902 | 1903 | 1904 | 1905 | 1906 | 1907 | 1908 | 1909 | 1910 | 1911 | 1912 | 1913 | 1914 | 1915 | 1916 | <> 1917 | 1918 | 1919 | 1920 | < http://foo.bar > 1921 | 1922 | 1923 | 1924 | 1925 | 1926 | http://example.com 1927 | 1928 | foo@bar.example.com 1929 | 1930 | 1931 | 1932 | 1933 | 1934 | 1936 | 1937 | 1939 | 1940 | 1941 | 1942 | 1943 | foo 1944 | 1945 | 1946 | <33> <__> 1947 | 1948 | 1949 | 1950 | 1961 | 1962 | foo 1964 | 1965 | foo 1966 | 1967 | foo foo --> 1968 | 1969 | foo 1970 | 1971 | foo 1972 | 1973 | foo 1974 | 1975 | foo &<]]> 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | foo 1984 | baz 1985 | 1986 | foo\ 1987 | baz 1988 | 1989 | foo 1990 | baz 1991 | 1992 | foo 1993 | bar 1994 | 1995 | foo\ 1996 | bar 1997 | 1998 | *foo 1999 | bar* 2000 | 2001 | *foo\ 2002 | bar* 2003 | 2004 | `code 2005 | span` 2006 | 2007 | `code\ 2008 | span` 2009 | 2010 | 2012 | 2013 | 2015 | 2016 | foo\ 2017 | 2018 | foo 2019 | 2020 | ### foo\ 2021 | 2022 | ### foo 2023 | 2024 | foo 2025 | baz 2026 | 2027 | foo 2028 | baz 2029 | 2030 | hello $.;'there 2031 | 2032 | Foo χρῆν 2033 | 2034 | Multiple spaces 2035 | 2036 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "pdf.h" 10 | 11 | #if defined(_WIN32) && !defined(__CYGWIN__) 12 | #include 13 | #include 14 | #endif 15 | 16 | void print_usage() 17 | { 18 | printf("Usage: cmarkpdf [FILE*]\n"); 19 | printf("Options:\n"); 20 | printf(" --output, -o FILE Output filename\n"); 21 | printf(" --sourcepos Include source position attribute\n"); 22 | printf(" --hardbreaks Treat newlines as hard line breaks\n"); 23 | printf(" --smart Use smart punctuation\n"); 24 | printf(" --help, -h Print usage information\n"); 25 | printf(" --version Print version\n"); 26 | } 27 | 28 | int main(int argc, char *argv[]) 29 | { 30 | int i, numfps = 0; 31 | int *files; 32 | int ok; 33 | char buffer[4096]; 34 | cmark_parser *parser; 35 | size_t bytes; 36 | cmark_node *document; 37 | char *outfile = NULL; 38 | int options = CMARK_OPT_DEFAULT | CMARK_OPT_SAFE | CMARK_OPT_NORMALIZE; 39 | 40 | #if defined(_WIN32) && !defined(__CYGWIN__) 41 | _setmode(_fileno(stdout), _O_BINARY); 42 | #endif 43 | 44 | files = (int *)malloc(argc * sizeof(*files)); 45 | 46 | for (i = 1; i < argc; i++) { 47 | if (strcmp(argv[i], "--version") == 0) { 48 | printf("cmark %s", CMARK_VERSION_STRING); 49 | printf(" - CommonMark converter\n(C) 2014, 2015 John MacFarlane\n"); 50 | exit(0); 51 | } else if (strcmp(argv[i], "--sourcepos") == 0) { 52 | options |= CMARK_OPT_SOURCEPOS; 53 | } else if (strcmp(argv[i], "--hardbreaks") == 0) { 54 | options |= CMARK_OPT_HARDBREAKS; 55 | } else if (strcmp(argv[i], "--smart") == 0) { 56 | options |= CMARK_OPT_SMART; 57 | } else if (strcmp(argv[i], "--validate-utf8") == 0) { 58 | options |= CMARK_OPT_VALIDATE_UTF8; 59 | } else if ((strcmp(argv[i], "--help") == 0) || 60 | (strcmp(argv[i], "-h") == 0)) { 61 | print_usage(); 62 | exit(0); 63 | } else if ((strcmp(argv[i], "-o") == 0) || 64 | (strcmp(argv[i], "--output") == 0)) { 65 | i += 1; 66 | if (i < argc) { 67 | outfile = argv[i]; 68 | } else { 69 | fprintf(stderr, "No argument provided for %s\n", 70 | argv[i - 1]); 71 | exit(1); 72 | } 73 | } else if (*argv[i] == '-') { 74 | print_usage(); 75 | exit(1); 76 | } else { // treat as file argument 77 | files[numfps++] = i; 78 | } 79 | } 80 | 81 | if (!outfile) { 82 | fprintf(stderr, "Specify an output file with -o/--output\n"); 83 | exit(1); 84 | } 85 | 86 | parser = cmark_parser_new(options); 87 | for (i = 0; i < numfps; i++) { 88 | FILE *fp = fopen(argv[files[i]], "r"); 89 | if (fp == NULL) { 90 | fprintf(stderr, "Error opening file %s: %s\n", 91 | argv[files[i]], strerror(errno)); 92 | exit(1); 93 | } 94 | 95 | while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { 96 | cmark_parser_feed(parser, buffer, bytes); 97 | if (bytes < sizeof(buffer)) { 98 | break; 99 | } 100 | } 101 | 102 | fclose(fp); 103 | } 104 | 105 | if (numfps == 0) { 106 | 107 | while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { 108 | cmark_parser_feed(parser, buffer, bytes); 109 | if (bytes < sizeof(buffer)) { 110 | break; 111 | } 112 | } 113 | } 114 | 115 | document = cmark_parser_finish(parser); 116 | cmark_parser_free(parser); 117 | 118 | ok = cmark_render_pdf(document, options, outfile); 119 | 120 | free(files); 121 | cmark_node_free(document); 122 | 123 | return ok ? 0 : 1; 124 | } 125 | -------------------------------------------------------------------------------- /src/pdf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "hpdf.h" 10 | #include "pdf.h" 11 | 12 | #if defined _LINUX 13 | #define FONT_PATH "/usr/share/fonts/truetype/dejavu/" 14 | #define MAIN_FONT "DejaVuSerif" 15 | #define MAIN_FONT_B MAIN_FONT "-Bold" 16 | #define MAIN_FONT_I MAIN_FONT "-Italic" 17 | #define MAIN_FONT_BI MAIN_FONT "-BoldItalic" 18 | #define TT_FONT "DejaVuSansMono" 19 | #define TT_FONT_B TT_FONT "-Bold" 20 | #define TT_FONT_I TT_FONT "-Oblique" 21 | #define TT_FONT_BI TT_FONT "-BoldOblique" 22 | 23 | #elif defined _OSX 24 | #define FONT_PATH "/Library/Fonts/" 25 | #define MAIN_FONT "Georgia" 26 | #define MAIN_FONT_B MAIN_FONT " Bold" 27 | #define MAIN_FONT_I MAIN_FONT " Italic" 28 | #define MAIN_FONT_BI MAIN_FONT " Bold Italic" 29 | #define TT_FONT "Andale Mono" 30 | #define TT_FONT_B TT_FONT 31 | #define TT_FONT_I TT_FONT 32 | #define TT_FONT_BI TT_FONT 33 | #endif 34 | 35 | #define MARGIN_TOP 100 36 | #define MARGIN_LEFT 100 37 | #define TEXT_WIDTH 380 38 | #define TEXT_HEIGHT 720 39 | 40 | #define STATUS_SKIP 2 41 | #define STATUS_OK 1 42 | #define STATUS_ERR 0 43 | 44 | #define MONOSPACE 1 45 | #define BOLD 2 46 | #define ITALIC 4 47 | 48 | #define errf(fmt, args) \ 49 | fprintf(stderr, "ERROR (%s:%d): ", __FILE__, __LINE__); \ 50 | fprintf(stderr, fmt, args); \ 51 | fprintf(stderr, "\n"); \ 52 | return STATUS_ERR; 53 | 54 | #define err(msg) errf("%s", msg); 55 | 56 | #ifdef HPDF_DLL 57 | void __stdcall 58 | #else 59 | void 60 | #endif 61 | error_handler (HPDF_STATUS error_no, 62 | HPDF_STATUS detail_no, 63 | void *user_data) 64 | { 65 | printf ("ERROR: error_no=%04X, detail_no=%u\n", 66 | (HPDF_UINT)error_no, (HPDF_UINT)detail_no); 67 | } 68 | 69 | enum box_type { 70 | TEXT, 71 | SPACE, 72 | BREAK, 73 | IMAGE 74 | }; 75 | 76 | struct box { 77 | enum box_type type; 78 | const char * text; 79 | int len; 80 | float width; 81 | float height; 82 | struct box * next; 83 | int style; 84 | const char * link_dest; 85 | HPDF_Image image; 86 | }; 87 | 88 | typedef struct box box; 89 | 90 | /* 91 | // for diagnostics 92 | static void 93 | print_box(box * box) 94 | { 95 | switch (box->type) { 96 | case TEXT: 97 | printf("TEXT "); 98 | break; 99 | case IMAGE: 100 | printf("IMAGE "); 101 | break; 102 | case SPACE: 103 | printf("SPACE "); 104 | break; 105 | case BREAK: 106 | printf("BREAK "); 107 | break; 108 | default: 109 | break; 110 | } 111 | printf("%5.2f|%2x|%s|\n", box->width, box->style, box->text); 112 | }; 113 | */ 114 | 115 | struct render_state { 116 | HPDF_Doc pdf; 117 | const char* font_paths[8]; 118 | HPDF_Font fonts[8]; 119 | HPDF_REAL base_font_size; 120 | HPDF_REAL current_font_size; 121 | HPDF_REAL leading; 122 | HPDF_Page page; 123 | float indent; 124 | float x; 125 | float y; 126 | float last_text_y; 127 | box * boxes_bottom; 128 | box * boxes_top; 129 | int list_indent_level; 130 | int style; 131 | const char* link_dest; 132 | }; 133 | 134 | // lazily load font 135 | static int 136 | load_font(struct render_state *state, 137 | int style) 138 | { 139 | const char * fontname; 140 | const char * path; 141 | 142 | if (state->fonts[style]) { 143 | return STATUS_OK; 144 | } 145 | 146 | path = state->font_paths[style]; 147 | 148 | fontname = HPDF_LoadTTFontFromFile(state->pdf, 149 | path, 150 | HPDF_TRUE); 151 | if (!fontname) { 152 | errf("Could not load main font '%s'", path); 153 | } 154 | 155 | state->fonts[style] = HPDF_GetFont (state->pdf, fontname, "UTF-8"); 156 | if (!state->fonts[style]) { 157 | errf("Could not get font '%s'", fontname); 158 | } 159 | 160 | return STATUS_OK; 161 | } 162 | 163 | static int 164 | push_image_box(struct render_state *state, 165 | HPDF_Image image) 166 | { 167 | box * new = (box*)malloc(sizeof(box)); 168 | if (new == NULL) { 169 | err("Could not allocate box"); 170 | } 171 | new->type = IMAGE; 172 | new->style = 0; 173 | new->text = NULL; 174 | new->len = 0; 175 | new->link_dest = NULL; 176 | new->image = image; 177 | new->width = HPDF_Image_GetWidth(image); 178 | new->height = HPDF_Image_GetHeight(image); 179 | new->next = NULL; 180 | if (state->boxes_top != NULL) { 181 | state->boxes_top->next = new; 182 | } 183 | state->boxes_top = new; 184 | if (state->boxes_bottom == NULL) { 185 | state->boxes_bottom = new; 186 | } 187 | return STATUS_OK; 188 | } 189 | 190 | static int 191 | push_box(struct render_state *state, 192 | enum box_type type, 193 | const char * text, 194 | int style) 195 | { 196 | HPDF_TextWidth width; 197 | HPDF_Font font; 198 | 199 | if (load_font(state, style) == STATUS_ERR) { 200 | return STATUS_ERR; 201 | } 202 | font = state->fonts[style]; 203 | 204 | box * new = (box*)malloc(sizeof(box)); 205 | if (new == NULL) { 206 | err("Could not allocate box"); 207 | } 208 | new->style = style; 209 | new->type = type; 210 | new->text = text; 211 | new->len = text ? strlen(text) : 0; 212 | new->link_dest = state->link_dest; 213 | new->height = state->current_font_size + state->leading; 214 | 215 | if (new->type == SPACE) { 216 | width = HPDF_Font_TextWidth(font, (HPDF_BYTE*)"i", 1); 217 | if (!(style & MONOSPACE)) { 218 | width.width *= 0.67; 219 | } 220 | } else { 221 | width = HPDF_Font_TextWidth(font, (HPDF_BYTE*)text, new->len); 222 | } 223 | new->width = ( width.width * state->current_font_size ) / 1000; 224 | new->next = NULL; 225 | if (state->boxes_top != NULL) { 226 | state->boxes_top->next = new; 227 | } 228 | state->boxes_top = new; 229 | if (state->boxes_bottom == NULL) { 230 | state->boxes_bottom = new; 231 | } 232 | return STATUS_OK; 233 | } 234 | 235 | static int 236 | render_text(struct render_state *state, const char *text, bool wrap, int style) 237 | { 238 | char * tok; 239 | const char * next = text; 240 | const char * last_tok = text; 241 | int category = 0; 242 | int last_category = 0; 243 | int status; 244 | 245 | while (1) { 246 | switch (*next) { 247 | case ' ': 248 | category = 1; 249 | break; 250 | case '\n': 251 | category = 2; 252 | break; 253 | case 0: 254 | category = 0; 255 | break; 256 | default: 257 | category = 3; 258 | } 259 | if (category != last_category && next > last_tok) { 260 | // emit token from last_tok to next-1 261 | tok = (char *)malloc((next - last_tok) + 1); 262 | if (tok == NULL) { 263 | err("Could not allocate token"); 264 | } 265 | memcpy(tok, last_tok, next - last_tok); 266 | tok[next - last_tok] = 0; 267 | last_tok = next; 268 | status = push_box(state, tok[0] == ' ' ? 269 | SPACE : (tok[0] == '\n' ? 270 | BREAK : TEXT), tok, 271 | style); 272 | if (status == STATUS_ERR) { 273 | return STATUS_ERR; 274 | } 275 | } 276 | if (*next == 0) 277 | break; 278 | last_category = category; 279 | next++; 280 | } 281 | return STATUS_OK; 282 | } 283 | 284 | // padding ensures that the specified space exists on the page 285 | static int 286 | add_page_if_needed(struct render_state *state, float padding) 287 | { 288 | if (!state->page || 289 | state->y - padding < 290 | HPDF_Page_GetHeight(state->page) - TEXT_HEIGHT) { 291 | /* add a new page object. */ 292 | state->page = HPDF_AddPage (state->pdf); 293 | if (!state->page) { 294 | err("Could not add page"); 295 | } 296 | state->y = HPDF_Page_GetHeight(state->page) - MARGIN_TOP; 297 | state->x = MARGIN_LEFT + state->indent; 298 | state->last_text_y = state->y; 299 | HPDF_Page_SetFontAndSize (state->page, 300 | state->fonts[0], 301 | state->current_font_size); 302 | } 303 | return STATUS_OK; 304 | } 305 | 306 | static int 307 | render_box(struct render_state *state, box * b) 308 | { 309 | int status; 310 | HPDF_Font font; 311 | HPDF_Rect rect = {state->x, state->y, state->x + b->width, 312 | state->y + state->current_font_size}; 313 | 314 | status = add_page_if_needed(state, 0); 315 | if (status == STATUS_ERR) { 316 | return status; 317 | } 318 | 319 | if (b->type == IMAGE) { 320 | if (HPDF_Page_DrawImage(state->page, b->image, 321 | state->x, 322 | state->y, 323 | // state->y + state->current_font_size + state->leading - b->height, 324 | b->width, 325 | b->height 326 | ) != HPDF_OK) { 327 | err("Could not draw image"); 328 | } 329 | state->x += b->width; 330 | return STATUS_OK; 331 | } 332 | 333 | // lazily load fonts as needed 334 | if (load_font(state, b->style) == STATUS_ERR) { 335 | return STATUS_ERR; 336 | } 337 | font = state->fonts[b->style]; 338 | 339 | if (b->link_dest != NULL && b->link_dest[0] != 0) { 340 | if (HPDF_Page_CreateURILinkAnnot (state->page, rect, 341 | b->link_dest) == NULL) { 342 | errf("Could not create link to '%s'", b->link_dest); 343 | } 344 | } 345 | 346 | HPDF_Page_SetFontAndSize (state->page, font, state->current_font_size); 347 | if (b->type == SPACE) { 348 | state->x += b->width; 349 | } else { 350 | HPDF_Page_BeginText (state->page); 351 | if (b->link_dest != NULL) { 352 | HPDF_Page_SetCMYKFill(state->page, 1, 0.5, 0, 0.5); 353 | } 354 | HPDF_Page_MoveTextPos(state->page, state->x, state->y); 355 | HPDF_Page_ShowText(state->page, b->text); 356 | if (b->link_dest != NULL) { 357 | HPDF_Page_SetCMYKFill(state->page, 0, 0, 0, 1); 358 | } 359 | HPDF_Page_EndText (state->page); 360 | state->x += b->width; 361 | } 362 | return STATUS_OK; 363 | } 364 | 365 | static int 366 | process_boxes(struct render_state *state, bool wrap) 367 | { 368 | box *b; 369 | box *tmp; 370 | box *last_nonspace; 371 | box *stop; 372 | float total_width = 0; 373 | float extra_space_width; 374 | float line_end_space; 375 | float max_width = TEXT_WIDTH - state->indent; 376 | int numspaces; 377 | int numspaces_to_last_nonspace; 378 | float max_height = 0; 379 | 380 | while (state->boxes_bottom) { 381 | 382 | numspaces = 0; 383 | b = state->boxes_bottom; 384 | last_nonspace = b; 385 | // move forward to last box that can fit in line 386 | while (b && 387 | b->type != BREAK && 388 | (!wrap || total_width + b->width <= max_width)) { 389 | total_width += b->width; 390 | if (b->type == SPACE) { 391 | numspaces++; 392 | } else { 393 | last_nonspace = b; 394 | numspaces_to_last_nonspace = numspaces; 395 | } 396 | b = b->next; 397 | } 398 | 399 | // recalculate space widths, unless last line of para or 400 | // line ends with hard break. 401 | if (b && b->type != BREAK && wrap) { 402 | line_end_space = max_width - total_width; 403 | extra_space_width = (line_end_space / numspaces_to_last_nonspace); 404 | } else { // last line 405 | extra_space_width = state->current_font_size / 10; 406 | } 407 | 408 | tmp = state->boxes_bottom; 409 | if (wrap) { 410 | while (tmp && tmp != last_nonspace) { 411 | if (tmp->type == SPACE) { 412 | tmp->width += extra_space_width; 413 | } 414 | tmp = tmp->next; 415 | } 416 | } 417 | 418 | // emit line up to last_nonspace; 419 | 420 | // remove and free everything up to last_nonspace, 421 | // plus any following spaces. reset boxes_bottom. 422 | total_width = 0; 423 | stop = last_nonspace->next; 424 | while (state->boxes_bottom && 425 | (state->boxes_bottom != stop)) { 426 | if (max_height < state->boxes_bottom->height) { 427 | max_height = state->boxes_bottom->height; 428 | } 429 | if (render_box(state, state->boxes_bottom) == STATUS_ERR) { 430 | return STATUS_ERR; 431 | } 432 | tmp = state->boxes_bottom; 433 | state->boxes_bottom = state->boxes_bottom->next; 434 | if (tmp->text) { 435 | free((char*)tmp->text); 436 | } 437 | free(tmp); 438 | } 439 | //gobble spaces 440 | while (state->boxes_bottom && state->boxes_bottom->type == SPACE) { 441 | tmp = state->boxes_bottom; 442 | state->boxes_bottom = state->boxes_bottom->next; 443 | if (tmp->text) { 444 | free((char*)tmp->text); 445 | } 446 | free(tmp); 447 | } 448 | //gobble at most one BREAK 449 | if (state->boxes_bottom && state->boxes_bottom->type == BREAK) { 450 | tmp = state->boxes_bottom; 451 | state->boxes_bottom = state->boxes_bottom->next; 452 | if (tmp->text) { 453 | free((char*)tmp->text); 454 | } 455 | free(tmp); 456 | } 457 | 458 | state->last_text_y = state->y; 459 | state->x = MARGIN_LEFT + state->indent; 460 | state->y -= max_height; 461 | 462 | } 463 | state->boxes_top = NULL; 464 | state->boxes_bottom = NULL; 465 | return STATUS_OK; 466 | } 467 | 468 | static int 469 | parbreak(struct render_state *state, float padding) 470 | { 471 | int status; 472 | 473 | status = add_page_if_needed(state, padding); 474 | if (status == STATUS_ERR) { 475 | return status; 476 | } 477 | state->y = state->last_text_y - 478 | (1.5 * (state->current_font_size + state->leading)); 479 | state->x = MARGIN_LEFT + state->indent; 480 | 481 | return STATUS_OK; 482 | } 483 | 484 | static int 485 | S_render_node(cmark_node *node, cmark_event_type ev_type, 486 | struct render_state *state, int options) 487 | { 488 | int status; 489 | int entering = ev_type == CMARK_EVENT_ENTER; 490 | float real_width; 491 | char * bullets[] = {"\xE2\x97\xA6", 492 | "\xE2\x80\xA2"}; 493 | char marker[20]; 494 | size_t len; 495 | cmark_node * parent; 496 | int itemnumber; 497 | cmark_node * tmp; 498 | HPDF_Image image; 499 | const char * image_path; 500 | 501 | switch (cmark_node_get_type(node)) { 502 | case CMARK_NODE_DOCUMENT: 503 | break; 504 | 505 | case CMARK_NODE_ITEM: 506 | parent = cmark_node_parent(node); 507 | real_width = state->current_font_size * 508 | (cmark_node_get_list_type(parent) == CMARK_BULLET_LIST ? 509 | 1.5 : 3); 510 | if (entering) { 511 | if (cmark_node_get_list_type(parent) == CMARK_BULLET_LIST) { 512 | parbreak(state, 0); 513 | len = strlen(bullets[state->list_indent_level % 2]); 514 | memcpy(marker, bullets[state->list_indent_level % 2], len); 515 | marker[len] = 0; 516 | } else { 517 | itemnumber = cmark_node_get_list_start(parent); 518 | tmp = node; 519 | while (cmark_node_previous(tmp)) { 520 | itemnumber++; 521 | tmp = cmark_node_previous(tmp); 522 | } 523 | sprintf(marker, "%4d.", itemnumber); 524 | len = strlen(marker); 525 | } 526 | parbreak(state, 0); 527 | HPDF_Page_SetFontAndSize (state->page, 528 | state->fonts[0], 529 | state->current_font_size); 530 | HPDF_Page_BeginText (state->page); 531 | HPDF_Page_MoveTextPos(state->page, state->x, state->y); 532 | HPDF_Page_ShowText(state->page, marker); 533 | HPDF_Page_EndText (state->page); 534 | state->x += real_width; 535 | state->indent += real_width; 536 | } else { 537 | state->indent -= real_width; 538 | } 539 | break; 540 | 541 | case CMARK_NODE_LIST: 542 | if (entering) { 543 | state->list_indent_level++; 544 | } else { 545 | state->list_indent_level--; 546 | } 547 | break; 548 | 549 | case CMARK_NODE_HRULE: 550 | parbreak(state, 0); 551 | HPDF_Page_MoveTo(state->page, state->x, state->y + state->leading); 552 | HPDF_Page_LineTo(state->page, state->x + (TEXT_WIDTH - (state->x - MARGIN_LEFT)), state->y + state->leading); 553 | HPDF_Page_Stroke(state->page); 554 | state->last_text_y = state->y; 555 | state->y -= (state->current_font_size + state->leading); 556 | state->x = MARGIN_LEFT + state->indent; 557 | break; 558 | 559 | case CMARK_NODE_BLOCK_QUOTE: 560 | if (entering) { 561 | state->indent += state->base_font_size * 2; 562 | state->current_font_size = state->base_font_size - 2; 563 | } else { 564 | state->current_font_size = state->base_font_size; 565 | state->indent -= state->base_font_size * 2; 566 | } 567 | break; 568 | 569 | case CMARK_NODE_PARAGRAPH: 570 | if (entering) { 571 | if (parbreak(state, 0) == STATUS_ERR) { 572 | return STATUS_ERR; 573 | } 574 | } else { 575 | if (process_boxes(state, true) == STATUS_ERR) { 576 | return STATUS_ERR; 577 | } 578 | } 579 | break; 580 | 581 | case CMARK_NODE_CODE_BLOCK: 582 | parbreak(state, 0); 583 | status = render_text(state, cmark_node_get_literal(node), false, state->style | MONOSPACE); 584 | if (status == STATUS_ERR) { 585 | return STATUS_ERR; 586 | } 587 | status = process_boxes(state, false); 588 | if (status == STATUS_ERR) { 589 | return STATUS_ERR; 590 | } 591 | state->y -= (state->current_font_size + state->leading); 592 | return STATUS_OK; 593 | 594 | case CMARK_NODE_HEADER: 595 | if (entering) { 596 | int lev = cmark_node_get_header_level(node); 597 | state->current_font_size = state->base_font_size * (1.66 - (lev/6)); 598 | parbreak(state, 3 * state->current_font_size); 599 | } else { 600 | if (process_boxes(state, true) == STATUS_ERR) { 601 | return STATUS_ERR; 602 | } 603 | state->y -= (0.3 * (state->current_font_size + state->leading)); 604 | state->current_font_size = state->base_font_size; 605 | } 606 | break; 607 | 608 | case CMARK_NODE_CODE: 609 | return render_text(state, cmark_node_get_literal(node), true, state->style | MONOSPACE); 610 | 611 | case CMARK_NODE_SOFTBREAK: 612 | return push_box(state, SPACE, NULL, 0); 613 | 614 | case CMARK_NODE_LINEBREAK: 615 | return push_box(state, BREAK, NULL, 0); 616 | 617 | case CMARK_NODE_TEXT: 618 | return render_text(state, cmark_node_get_literal(node), true, state->style); 619 | 620 | case CMARK_NODE_LINK: 621 | if (entering) { 622 | state->link_dest = cmark_node_get_url(node); 623 | } else { 624 | state->link_dest = NULL; 625 | } 626 | break; 627 | 628 | case CMARK_NODE_IMAGE: 629 | if (entering) { 630 | image_path = cmark_node_get_url(node); 631 | image = HPDF_LoadPngImageFromFile(state->pdf, image_path); 632 | if (image == NULL) { 633 | fprintf(stderr, 634 | "Could not load PNG image '%s'\n", 635 | image_path); 636 | HPDF_ResetError(state->pdf); 637 | return STATUS_OK; 638 | } else if (push_image_box(state, image) == STATUS_ERR) { 639 | return STATUS_ERR; 640 | } 641 | return STATUS_SKIP; 642 | } 643 | break; 644 | 645 | case CMARK_NODE_EMPH: 646 | if (entering) { 647 | state->style |= ITALIC; 648 | } else { 649 | state->style &= ~ITALIC; 650 | } 651 | break; 652 | 653 | case CMARK_NODE_STRONG: 654 | if (entering) { 655 | state->style |= BOLD; 656 | } else { 657 | state->style &= ~BOLD; 658 | } 659 | break; 660 | 661 | default: 662 | break; 663 | } 664 | 665 | return STATUS_OK; 666 | } 667 | 668 | 669 | // Returns 1 on success, 0 on failure. 670 | int cmark_render_pdf(cmark_node *root, int options, char *outfile) 671 | { 672 | struct render_state state = { }; 673 | state.font_paths[0] = FONT_PATH MAIN_FONT ".ttf"; 674 | state.font_paths[BOLD] = FONT_PATH MAIN_FONT_B ".ttf"; 675 | state.font_paths[ITALIC] = FONT_PATH MAIN_FONT_I ".ttf"; 676 | state.font_paths[BOLD + ITALIC] = FONT_PATH MAIN_FONT_BI ".ttf"; 677 | state.font_paths[MONOSPACE] = FONT_PATH TT_FONT ".ttf"; 678 | state.font_paths[MONOSPACE + BOLD] = FONT_PATH TT_FONT_B ".ttf"; 679 | state.font_paths[MONOSPACE + ITALIC] = FONT_PATH TT_FONT_I ".ttf"; 680 | state.font_paths[MONOSPACE + BOLD + ITALIC] = FONT_PATH TT_FONT_BI ".ttf"; 681 | 682 | state.pdf = HPDF_New (error_handler, NULL); 683 | if (!state.pdf) { 684 | err("Cannot create PdfDoc object"); 685 | } 686 | 687 | if (HPDF_UseUTFEncodings(state.pdf) != HPDF_OK) { 688 | err("Cannot set UTF-8 encoding"); 689 | }; 690 | 691 | /* set compression mode */ 692 | HPDF_SetCompressionMode (state.pdf, HPDF_COMP_ALL); 693 | 694 | state.style = 0; 695 | state.base_font_size = 10; 696 | state.current_font_size = 10; 697 | state.leading = 4; 698 | state.indent = 0; 699 | state.boxes_bottom = NULL; 700 | state.boxes_top = NULL; 701 | state.list_indent_level = 0; 702 | state.link_dest = NULL; 703 | 704 | // load main font: others loaded lazily as needed 705 | if (load_font(&state, 0) == STATUS_ERR) { 706 | return STATUS_ERR; 707 | } 708 | 709 | cmark_event_type ev_type; 710 | cmark_node *cur; 711 | cmark_iter *iter = cmark_iter_new(root); 712 | int status = STATUS_OK; 713 | 714 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 715 | cur = cmark_iter_get_node(iter); 716 | status = S_render_node(cur, ev_type, &state, options); 717 | if (status == STATUS_ERR) { 718 | break; 719 | } 720 | if (status == STATUS_SKIP && 721 | cmark_node_last_child(cur)) { 722 | // skip processing children 723 | cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); 724 | status = STATUS_OK; 725 | } 726 | } 727 | 728 | cmark_iter_free(iter); 729 | 730 | if (status == STATUS_OK) { 731 | /* save the document to a file */ 732 | if (HPDF_SaveToFile (state.pdf, outfile) != HPDF_OK) { 733 | errf("Could not save PDF to file '%s'", outfile); 734 | status = STATUS_ERR; 735 | } 736 | } 737 | 738 | /* clean up */ 739 | HPDF_Free (state.pdf); 740 | 741 | return status; 742 | } 743 | -------------------------------------------------------------------------------- /src/pdf.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CMARK_PDF_H 2 | #define CMARK_CMARK_PDF_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | int cmark_render_pdf(cmark_node *root, int options, char *outfile); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | #endif 17 | --------------------------------------------------------------------------------