├── .editorconfig ├── .github ├── FUNDING.yml └── workflows │ ├── build.yml │ └── publish.yml ├── .gitignore ├── .idea ├── .name ├── codeStyles │ ├── Project.xml │ └── codeStyleConfig.xml ├── inspectionProfiles │ └── Project_Default.xml ├── modules.xml ├── node-html-markdown.iml └── vcs.xml ├── CHANGELOG.md ├── README.md ├── benchmark ├── LICENSE.md ├── README.md ├── _run.js ├── execute.js ├── files │ ├── 039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec.html │ ├── 06ed0a833361190536a4f61888354e07dccaa501bd9a1c0f1c545533bde1650b.html │ ├── 078cdb456d1beb698aeed86e0f2161e442e9431c4580295f1ba4ece22741068c.html │ ├── 0a8c510c3691d8e68ccc749559680257a382fe792a3d4d8531fb285cd74c3492.html │ ├── 0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8.html │ ├── 17ca85324662023ba21666b3ca5d5d37a92b2806bf7a88b906c28b90a635f82a.html │ ├── 19fe8f574b7420277862728929d83dd74d7aa9c742688ca4c386b75693547bd3.html │ ├── 1a2c2f9fe410c836bb94e85c85625dbe8174f6e57f0b0316644cefd30979f096.html │ ├── 1bbc7f62e80e44afd533e896c0168c3b18f1e934530d05cb1f579ad3347d135c.html │ ├── 1d43b4816bdba5825165dc21558d9eafb9f650c67ba048411b04dc77a745dc39.html │ ├── 1de0efed4d661163ff8414e8ca69f45a49efd7edca19dc896ca0983a4bf41485.html │ ├── 1e62a223bca12adda6410b1789072a2ad755566bd4a6bc17d10dc95a51d74d65.html │ ├── 20f1955819dc2b50d2d10788f73adc72bceb491a03ed608debb72a90bce65c50.html │ ├── 22c0f41ae560968de5e6b0ef9ecffffeae3f409aa73d9b82853f65535116f68f.html │ ├── 22c3886e7116464c04c2332c20a013a5837992e7bcdb1f6cacd7d475f9784273.html │ ├── 22c4be85802e9602a344fc2cc704093362b9193523c6e35cfb7dc086c8ef8648.html │ ├── 26c3b98f33bb6902f32535235fd7d32792df87779bdf1f86c3b21e15fbf3161d.html │ ├── 2dbf7cd4444617cc60f0e2d2c95b20a535979a32972f5005e2af577b37980e48.html │ ├── 2fd71e2969106342bab6862bb212ae16ba592b426dd4141da8a383b183aa3a37.html │ ├── 35f536ef8c8eba0616f2dc78e6653e1d7d68e3af927b09efad3dae7ce2080567.html │ ├── 3b27831099c75b36d5978864ec89575c675c963e949cda52147a044bbfa77559.html │ ├── 3f6413c32bffc73b64cb1a2adb237cd19ffc75494c9172755f1a961ba32e75dd.html │ ├── 40d4e50472a8f0d30d68613051be510ed098087679df7e0e564d6dd32152d679.html │ ├── 42b43887c6dd91353249924745e030eac3a6d818966d91c67b406431ff9bdf05.html │ ├── 44b21071ae6feede3c36d2ab032cd422eb0c6a0fdfe4da79531931ad93dd4940.html │ ├── 44f750fab67bb9f54f5b5cc90bc34d55cff06260a3e63245856a6e57fcda5906.html │ ├── 45b6063ac2016db7b2fb1f995f0b54ee054fb561022e169c8fdbe321dcf672db.html │ ├── 45efaba666da241d9d069b550890530b65f2a6b61a5e529e1d3664630d4897ee.html │ ├── 46ab324348ca339dba58238e193f794c3309e52c018a8156ef9aedfedf0572e7.html │ ├── 46ed10778ec7c1292e624e1a72a2a0899f8ab6d8d4db1aa57fa4418b8b7e0a5d.html │ ├── 4b8debc51d3d9598ad4552cc7a591d200a6c7d545fed2454916bedbb0f666086.html │ ├── 4bf8e536214f987f4a0bf6ca7d233619d30bde1e80a816c78d00358eb61e353c.html │ ├── 4e0e399d24fe145def4817facccb0ff79e305dedb9ece5f8ec66396ea378f723.html │ ├── 4f454cb97e9b77d94c10ed8a6a35cd2eff1671de9d3d27852a38abd76a95be83.html │ ├── 4f83531b9fc91fd1e0062e43200669cd82cc36a518caa7f66fc6ba5be4ac545b.html │ ├── 4fe5472ba89db38e20daef6025108310c52121fd382c06314d5b33d7f47c1e94.html │ ├── 5a012f66c2bf0c70a0744c7483478aaa0c1a2b5b5920a72223f3a090e39df8be.html │ ├── 5bc9df3a36efb57a22edf862cec6a28eb112e535559c194d7976fb664c922c13.html │ ├── 5c83c2d71f97e2b5a979f197fbae6773dee6844e28889ae66ccb8d7458a9c5bb.html │ ├── 5de3db78f95172797a51b3b3b2cdc4caeb63a4d7b709e4441510d2c1967e0e6f.html │ ├── 5f081a0a9d1a1ce3b0e53603ecd8bde78947841c8fd1ff3c36efa95ee84681f6.html │ ├── 5f8b89390d3fc01c6a80728ba2aee597fea1dbfc8399d61015956db71e5336c7.html │ ├── 5f8c9f60be2250f694094ee1ca5deb9df10479e29fc92ff07c77c4cb9d2c3f21.html │ ├── 5fbfe3905c71925b1b3a875a3111073e5d0996d3f250a697398477d3642db321.html │ ├── 60b8aff17382f2fd02584645ef66e517b41f764d5b4ca404c1ceff3fe22bdda8.html │ ├── 60bccec4069d54a6889bfcda785c0f3066a70cb5fadeea81f28d371681a2dee8.html │ ├── 60cc80fb25f0b2ebdb2e6835ab7bfd3d26362971e39fe8838e7ac548ba323cf0.html │ ├── 61adb9c208d9c67253b4413ef7ec2d010edae448b8c832bff2254125e4b51d5f.html │ ├── 61d8052b19ed9885651ed1110ddcccc001f9ec2e3b7a77926d350762bcd02400.html │ ├── 63c6d5256b8ce1098b5688eb5fafa747e9467692d099a3e9e42246e7af29748f.html │ ├── 64bf40da8348d808ef103cc5529fd268fec46fbefa40b486d288d2a07871a527.html │ ├── 6a59bd96489c98226c72f0245bac98a4b09aa0516ebfe4982233a6c33d129691.html │ ├── 6b095375a53dfc7994a032e2efac70f43a4fac9303d549256d88b8f7cecadd50.html │ ├── 6b817bedb8d6402bab160ed6d2b99256163bd3aef20deae3015f74e5bb253e55.html │ ├── 6d30abed88489774017024b17cdb1928d9a2b45bb79767515383b8444e9601b2.html │ ├── 71bf3c23c5d3fff9cec67606fde6547c8866ae8aa95f5991651d94c68df4ad1d.html │ ├── 71cb773c42c94b75d41c059a27dd10b763443a71dbb6dd202402843de8a5e331.html │ ├── 72e78dee157bdf3e8a9a9f07e54a98a3714ea2998e2c2e2a94c46dbe92176feb.html │ ├── 72ecfb3f60f4e8a6103916f2041ce9a55c4ef1e31477f9a8ffb7f4d3bba8c559.html │ ├── 73c175cdf9d5e065351ecf2220510088904adb77b49211cdd99e43e5870e06c2.html │ ├── 74e8bc94abea7c60f022d8d3f672f80e59e3e126735fae0b5ee5914ff2fce48e.html │ ├── 7a426de207434e419a65eead0f4b46c8a479429d8429c36dc03b033d7e4891df.html │ ├── 7b7ffca82db8f721d6e5a8e4e65e60885af5eee4b9f28beb6b8363bb70c820f9.html │ ├── 7e26f2e426fef3c1a370382e7827ef2e530a2ff0c2cea7641ebb596a4a1b8008.html │ ├── 7e2d19ccbb3b4029dddf26557555278babdac18bb78a742052fd946001c28e4e.html │ ├── 7e54e701ac39a9046d6eeb0ae75d2138733b66b30b5211e7f3245dd6dc3ca36c.html │ ├── 7e91eb56692c91312a3dc3e7b769a2916029ef3d9e431d056d5f548c0f771d16.html │ ├── 7fc58a2d32d5b8d5fa9b918453a284acc71703ccfa0f0c89ec292b4245fd0521.html │ ├── 81d304541f62a6aaf29494766718ab8e58e95a8e784613e75f106cdef17868d6.html │ ├── 83c362b1373f55d45fdad0edee4d2885cafd0da3f2afb146cf2822448c3c4104.html │ ├── 84a7e7d5f61c90050a326bb74ac3a57899fdba4b755bd50df01a053c262d354e.html │ ├── 8a1eb64f950f2f43097577c244fb38a35660f50a88c4305b23a8f24f254da8cb.html │ ├── 8a701b6ec1c56e2c37357030da0b4b10af4187f069a988e12c2f91d2ba40cdc1.html │ ├── 8a82ce22fec5e3656dad3d55e585727c88c94808ad92e37a0f6e99dcb3888800.html │ ├── 8a9d17a1e5b1866abc7b9263fabbc428e5299c7443ecad6cc56c0076287fe11a.html │ ├── 8bd6d9bcba689408767f770d69f12b59c3f092e73cffcc9332261fbab4aa16e1.html │ ├── 8c0dd0456453aeff3f66d053710f18adc1a2fc0f1f3a0c95a3e166e41ffb737d.html │ ├── 8c1a780dec8c1a5ea0344514524f53b2b580ce87083e0a756ade3d83627d5653.html │ ├── 8cbf3b144736ffc4adda5fe7105e7fd1413dcc1955110829d849a658aa722bea.html │ ├── 8cfa9d30e2b66b991461423012906121661cd9c8809f564eabb660149577864d.html │ ├── 8d612a03fa42a2fb014b59534c46c9590da90fbeb91ac50938cdfa36dd274e23.html │ ├── 8faa3156452fa9d0667617c406eb9b6458b48d7b8c36cf2bf804fba290b302f5.html │ ├── 9c947bc9fbcb4e2eb0296d858fe193f580e869db7869358af822d7d2d4c0388e.html │ ├── 9e04cb267a9b128369a11c7f6e5486d43644955dee7f73cc004b9cf1693a11c1.html │ ├── 9e3c6d40690c1302613f203db178b23f9f18494d2653a1b547086a3973fff93c.html │ ├── 9f2031ee45a11919452ca2efbc3498672324cda5f76314d7ea10913f63cf3545.html │ └── 9fba51a14308353194c537f494ded0ccb27d9f908f252690b083d48db64ea15a.html ├── index.js ├── package.json ├── wrapper │ ├── node-html-markdown.js │ ├── node-html-markdown_reuse.js │ ├── turndown.js │ └── turndown_reuse.js └── yarn.lock ├── jest.config.js ├── package.json ├── src ├── config.ts ├── index.ts ├── main.ts ├── nodes.ts ├── options.ts ├── translator.ts ├── utilities.ts └── visitor.ts ├── test ├── default-tags-codeblock.test.ts ├── default-tags.test.ts ├── options.test.ts ├── special-cases.test.ts ├── table.test.ts └── tsconfig.json ├── transformer.js ├── tsconfig.base.json ├── tsconfig.json └── yarn.lock /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | insert_final_newline = true 9 | max_line_length = 120 10 | ij_visual_guides = 120 11 | ij_wrap_on_typing = false 12 | end_of_line = lf 13 | 14 | [*.md] 15 | trim_trailing_whitespace = false 16 | 17 | [.editorconfig] 18 | ij_editorconfig_align_group_field_declarations = false 19 | ij_editorconfig_space_after_colon = false 20 | ij_editorconfig_space_after_comma = true 21 | ij_editorconfig_space_before_colon = false 22 | ij_editorconfig_space_before_comma = false 23 | ij_editorconfig_spaces_around_assignment_operators = true 24 | 25 | 26 | [{*.js, *.cjs}] 27 | tab_width = 2 28 | ij_continuation_indent_size = 2 29 | ij_javascript_align_imports = false 30 | ij_javascript_align_multiline_array_initializer_expression = false 31 | ij_javascript_align_multiline_binary_operation = false 32 | ij_javascript_align_multiline_chained_methods = false 33 | ij_javascript_align_multiline_extends_list = false 34 | ij_javascript_align_multiline_for = true 35 | ij_javascript_align_multiline_parameters = true 36 | ij_javascript_align_multiline_parameters_in_calls = false 37 | ij_javascript_align_multiline_ternary_operation = true 38 | ij_javascript_align_object_properties = 0 39 | ij_javascript_align_union_types = false 40 | ij_javascript_align_var_statements = 0 41 | ij_javascript_array_initializer_new_line_after_left_brace = false 42 | ij_javascript_array_initializer_right_brace_on_new_line = false 43 | ij_javascript_array_initializer_wrap = off 44 | ij_javascript_assignment_wrap = off 45 | ij_javascript_binary_operation_sign_on_next_line = false 46 | ij_javascript_binary_operation_wrap = off 47 | ij_javascript_blacklist_imports = rxjs/Rx, node_modules/**/*, @angular/material, @angular/material/typings/** 48 | ij_javascript_blank_lines_after_imports = 2 49 | ij_javascript_blank_lines_around_class = 1 50 | ij_javascript_blank_lines_around_field = 0 51 | ij_javascript_blank_lines_around_function = 1 52 | ij_javascript_blank_lines_around_method = 0 53 | ij_javascript_block_brace_style = end_of_line 54 | ij_javascript_call_parameters_new_line_after_left_paren = false 55 | ij_javascript_call_parameters_right_paren_on_new_line = false 56 | ij_javascript_call_parameters_wrap = off 57 | ij_javascript_catch_on_new_line = true 58 | ij_javascript_chained_call_dot_on_new_line = true 59 | ij_javascript_class_brace_style = next_line_if_wrapped 60 | ij_javascript_comma_on_new_line = false 61 | ij_javascript_do_while_brace_force = never 62 | ij_javascript_else_on_new_line = false 63 | ij_javascript_enforce_trailing_comma = keep 64 | ij_javascript_extends_keyword_wrap = off 65 | ij_javascript_extends_list_wrap = off 66 | ij_javascript_field_prefix = _ 67 | ij_javascript_file_name_style = relaxed 68 | ij_javascript_finally_on_new_line = true 69 | ij_javascript_for_brace_force = never 70 | ij_javascript_for_statement_new_line_after_left_paren = false 71 | ij_javascript_for_statement_right_paren_on_new_line = false 72 | ij_javascript_for_statement_wrap = off 73 | ij_javascript_force_quote_style = false 74 | ij_javascript_force_semicolon_style = false 75 | ij_javascript_function_expression_brace_style = next_line_if_wrapped 76 | ij_javascript_if_brace_force = never 77 | ij_javascript_import_merge_members = global 78 | ij_javascript_import_prefer_absolute_path = global 79 | ij_javascript_import_sort_members = true 80 | ij_javascript_import_sort_module_name = false 81 | ij_javascript_import_use_node_resolution = true 82 | ij_javascript_imports_wrap = normal 83 | ij_javascript_indent_case_from_switch = true 84 | ij_javascript_indent_chained_calls = true 85 | ij_javascript_indent_package_children = 0 86 | ij_javascript_jsx_attribute_value = braces 87 | ij_javascript_keep_blank_lines_in_code = 2 88 | ij_javascript_keep_first_column_comment = true 89 | ij_javascript_keep_indents_on_empty_lines = false 90 | ij_javascript_keep_line_breaks = true 91 | ij_javascript_keep_simple_blocks_in_one_line = true 92 | ij_javascript_keep_simple_methods_in_one_line = true 93 | ij_javascript_line_comment_add_space = true 94 | ij_javascript_line_comment_at_first_column = false 95 | ij_javascript_method_brace_style = end_of_line 96 | ij_javascript_method_call_chain_wrap = off 97 | ij_javascript_method_parameters_new_line_after_left_paren = false 98 | ij_javascript_method_parameters_right_paren_on_new_line = false 99 | ij_javascript_method_parameters_wrap = off 100 | ij_javascript_object_literal_wrap = on_every_item 101 | ij_javascript_parentheses_expression_new_line_after_left_paren = false 102 | ij_javascript_parentheses_expression_right_paren_on_new_line = false 103 | ij_javascript_place_assignment_sign_on_next_line = false 104 | ij_javascript_prefer_as_type_cast = false 105 | ij_javascript_prefer_parameters_wrap = false 106 | ij_javascript_reformat_c_style_comments = false 107 | ij_javascript_space_after_colon = true 108 | ij_javascript_space_after_comma = true 109 | ij_javascript_space_after_dots_in_rest_parameter = false 110 | ij_javascript_space_after_generator_mult = true 111 | ij_javascript_space_after_property_colon = true 112 | ij_javascript_space_after_quest = true 113 | ij_javascript_space_after_type_colon = true 114 | ij_javascript_space_after_unary_not = false 115 | ij_javascript_space_before_async_arrow_lparen = true 116 | ij_javascript_space_before_catch_keyword = true 117 | ij_javascript_space_before_catch_left_brace = true 118 | ij_javascript_space_before_catch_parentheses = true 119 | ij_javascript_space_before_class_lbrace = true 120 | ij_javascript_space_before_class_left_brace = true 121 | ij_javascript_space_before_colon = true 122 | ij_javascript_space_before_comma = false 123 | ij_javascript_space_before_do_left_brace = true 124 | ij_javascript_space_before_else_keyword = true 125 | ij_javascript_space_before_else_left_brace = true 126 | ij_javascript_space_before_finally_keyword = true 127 | ij_javascript_space_before_finally_left_brace = true 128 | ij_javascript_space_before_for_left_brace = true 129 | ij_javascript_space_before_for_parentheses = true 130 | ij_javascript_space_before_for_semicolon = false 131 | ij_javascript_space_before_function_left_parenth = true 132 | ij_javascript_space_before_generator_mult = false 133 | ij_javascript_space_before_if_left_brace = true 134 | ij_javascript_space_before_if_parentheses = true 135 | ij_javascript_space_before_method_call_parentheses = false 136 | ij_javascript_space_before_method_left_brace = true 137 | ij_javascript_space_before_method_parentheses = false 138 | ij_javascript_space_before_property_colon = false 139 | ij_javascript_space_before_quest = true 140 | ij_javascript_space_before_switch_left_brace = true 141 | ij_javascript_space_before_switch_parentheses = true 142 | ij_javascript_space_before_try_left_brace = true 143 | ij_javascript_space_before_type_colon = false 144 | ij_javascript_space_before_unary_not = false 145 | ij_javascript_space_before_while_keyword = true 146 | ij_javascript_space_before_while_left_brace = true 147 | ij_javascript_space_before_while_parentheses = true 148 | ij_javascript_spaces_around_additive_operators = true 149 | ij_javascript_spaces_around_arrow_function_operator = true 150 | ij_javascript_spaces_around_assignment_operators = true 151 | ij_javascript_spaces_around_bitwise_operators = true 152 | ij_javascript_spaces_around_equality_operators = true 153 | ij_javascript_spaces_around_logical_operators = true 154 | ij_javascript_spaces_around_multiplicative_operators = true 155 | ij_javascript_spaces_around_relational_operators = true 156 | ij_javascript_spaces_around_shift_operators = true 157 | ij_javascript_spaces_around_unary_operator = false 158 | ij_javascript_spaces_within_array_initializer_brackets = true 159 | ij_javascript_spaces_within_brackets = false 160 | ij_javascript_spaces_within_catch_parentheses = false 161 | ij_javascript_spaces_within_for_parentheses = false 162 | ij_javascript_spaces_within_if_parentheses = false 163 | ij_javascript_spaces_within_imports = true 164 | ij_javascript_spaces_within_interpolation_expressions = false 165 | ij_javascript_spaces_within_method_call_parentheses = false 166 | ij_javascript_spaces_within_method_parentheses = false 167 | ij_javascript_spaces_within_object_literal_braces = true 168 | ij_javascript_spaces_within_object_type_braces = true 169 | ij_javascript_spaces_within_parentheses = false 170 | ij_javascript_spaces_within_switch_parentheses = false 171 | ij_javascript_spaces_within_type_assertion = false 172 | ij_javascript_spaces_within_union_types = true 173 | ij_javascript_spaces_within_while_parentheses = false 174 | ij_javascript_special_else_if_treatment = true 175 | ij_javascript_ternary_operation_signs_on_next_line = false 176 | ij_javascript_ternary_operation_wrap = off 177 | ij_javascript_union_types_wrap = on_every_item 178 | ij_javascript_use_chained_calls_group_indents = false 179 | ij_javascript_use_double_quotes = true 180 | ij_javascript_use_explicit_js_extension = auto 181 | ij_javascript_use_path_mapping = always 182 | ij_javascript_use_public_modifier = false 183 | ij_javascript_use_semicolon_after_statement = true 184 | ij_javascript_var_declaration_wrap = normal 185 | ij_javascript_while_brace_force = never 186 | ij_javascript_while_on_new_line = false 187 | ij_javascript_wrap_comments = false 188 | 189 | 190 | [{*.zsh, *.bash, *.sh}] 191 | ij_shell_binary_ops_start_line = false 192 | ij_shell_keep_column_alignment_padding = false 193 | ij_shell_minify_program = false 194 | ij_shell_redirect_followed_by_space = false 195 | ij_shell_switch_cases_indented = false 196 | 197 | 198 | [{.babelrc, .prettierrc, .stylelintrc, .eslintrc, jest.config, *.json, *.jsb3, *.jsb2, *.bowerrc}] 199 | ij_json_keep_blank_lines_in_code = 2 200 | ij_json_keep_indents_on_empty_lines = false 201 | ij_json_keep_line_breaks = true 202 | ij_json_space_after_colon = true 203 | ij_json_space_after_comma = true 204 | ij_json_space_before_colon = true 205 | ij_json_space_before_comma = false 206 | ij_json_spaces_within_braces = true 207 | ij_json_spaces_within_brackets = true 208 | ij_json_wrap_long_lines = false 209 | 210 | [{*.ats, *.ts, *.tsx}] 211 | tab_width = 2 212 | ij_continuation_indent_size = 2 213 | ij_typescript_align_imports = false 214 | ij_typescript_align_multiline_array_initializer_expression = false 215 | ij_typescript_align_multiline_binary_operation = false 216 | ij_typescript_align_multiline_chained_methods = false 217 | ij_typescript_align_multiline_extends_list = false 218 | ij_typescript_align_multiline_for = true 219 | ij_typescript_align_multiline_parameters = true 220 | ij_typescript_align_multiline_parameters_in_calls = false 221 | ij_typescript_align_multiline_ternary_operation = true 222 | ij_typescript_align_object_properties = 0 223 | ij_typescript_align_union_types = false 224 | ij_typescript_align_var_statements = 0 225 | ij_typescript_array_initializer_new_line_after_left_brace = false 226 | ij_typescript_array_initializer_right_brace_on_new_line = false 227 | ij_typescript_array_initializer_wrap = off 228 | ij_typescript_assignment_wrap = off 229 | ij_typescript_binary_operation_sign_on_next_line = false 230 | ij_typescript_binary_operation_wrap = off 231 | ij_typescript_blacklist_imports = rxjs/Rx, node_modules/**/*, @angular/material, @angular/material/typings/** 232 | ij_typescript_blank_lines_after_imports = 2 233 | ij_typescript_blank_lines_around_class = 1 234 | ij_typescript_blank_lines_around_field = 0 235 | ij_typescript_blank_lines_around_field_in_interface = 0 236 | ij_typescript_blank_lines_around_function = 0 237 | ij_typescript_blank_lines_around_method = 0 238 | ij_typescript_blank_lines_around_method_in_interface = 0 239 | ij_typescript_block_brace_style = end_of_line 240 | ij_typescript_call_parameters_new_line_after_left_paren = false 241 | ij_typescript_call_parameters_right_paren_on_new_line = false 242 | ij_typescript_call_parameters_wrap = off 243 | ij_typescript_catch_on_new_line = true 244 | ij_typescript_chained_call_dot_on_new_line = true 245 | ij_typescript_class_brace_style = next_line_if_wrapped 246 | ij_typescript_comma_on_new_line = false 247 | ij_typescript_do_while_brace_force = never 248 | ij_typescript_else_on_new_line = false 249 | ij_typescript_enforce_trailing_comma = keep 250 | ij_typescript_extends_keyword_wrap = off 251 | ij_typescript_extends_list_wrap = off 252 | ij_typescript_field_prefix = _ 253 | ij_typescript_file_name_style = relaxed 254 | ij_typescript_finally_on_new_line = true 255 | ij_typescript_for_brace_force = never 256 | ij_typescript_for_statement_new_line_after_left_paren = false 257 | ij_typescript_for_statement_right_paren_on_new_line = false 258 | ij_typescript_for_statement_wrap = off 259 | ij_typescript_force_quote_style = true 260 | ij_typescript_force_semicolon_style = false 261 | ij_typescript_function_expression_brace_style = next_line_if_wrapped 262 | ij_typescript_if_brace_force = never 263 | ij_typescript_import_merge_members = global 264 | ij_typescript_import_prefer_absolute_path = global 265 | ij_typescript_import_sort_members = true 266 | ij_typescript_import_sort_module_name = false 267 | ij_typescript_import_use_node_resolution = true 268 | ij_typescript_imports_wrap = normal 269 | ij_typescript_indent_case_from_switch = true 270 | ij_typescript_indent_chained_calls = true 271 | ij_typescript_indent_package_children = 0 272 | ij_typescript_jsdoc_include_types = false 273 | ij_typescript_jsx_attribute_value = braces 274 | ij_typescript_keep_blank_lines_in_code = 2 275 | ij_typescript_keep_first_column_comment = true 276 | ij_typescript_keep_indents_on_empty_lines = false 277 | ij_typescript_keep_line_breaks = true 278 | ij_typescript_keep_simple_blocks_in_one_line = true 279 | ij_typescript_keep_simple_methods_in_one_line = true 280 | ij_typescript_line_comment_add_space = true 281 | ij_typescript_line_comment_at_first_column = false 282 | ij_typescript_method_brace_style = next_line_if_wrapped 283 | ij_typescript_method_call_chain_wrap = off 284 | ij_typescript_method_parameters_new_line_after_left_paren = false 285 | ij_typescript_method_parameters_right_paren_on_new_line = false 286 | ij_typescript_method_parameters_wrap = off 287 | ij_typescript_object_literal_wrap = on_every_item 288 | ij_typescript_parentheses_expression_new_line_after_left_paren = false 289 | ij_typescript_parentheses_expression_right_paren_on_new_line = false 290 | ij_typescript_place_assignment_sign_on_next_line = false 291 | ij_typescript_prefer_as_type_cast = false 292 | ij_typescript_prefer_parameters_wrap = false 293 | ij_typescript_reformat_c_style_comments = false 294 | ij_typescript_space_after_colon = true 295 | ij_typescript_space_after_comma = true 296 | ij_typescript_space_after_dots_in_rest_parameter = false 297 | ij_typescript_space_after_generator_mult = true 298 | ij_typescript_space_after_property_colon = true 299 | ij_typescript_space_after_quest = true 300 | ij_typescript_space_after_type_colon = true 301 | ij_typescript_space_after_unary_not = false 302 | ij_typescript_space_before_async_arrow_lparen = true 303 | ij_typescript_space_before_catch_keyword = true 304 | ij_typescript_space_before_catch_left_brace = true 305 | ij_typescript_space_before_catch_parentheses = true 306 | ij_typescript_space_before_class_lbrace = true 307 | ij_typescript_space_before_class_left_brace = true 308 | ij_typescript_space_before_colon = true 309 | ij_typescript_space_before_comma = false 310 | ij_typescript_space_before_do_left_brace = true 311 | ij_typescript_space_before_else_keyword = true 312 | ij_typescript_space_before_else_left_brace = true 313 | ij_typescript_space_before_finally_keyword = true 314 | ij_typescript_space_before_finally_left_brace = true 315 | ij_typescript_space_before_for_left_brace = true 316 | ij_typescript_space_before_for_parentheses = true 317 | ij_typescript_space_before_for_semicolon = false 318 | ij_typescript_space_before_function_left_parenth = true 319 | ij_typescript_space_before_generator_mult = false 320 | ij_typescript_space_before_if_left_brace = true 321 | ij_typescript_space_before_if_parentheses = true 322 | ij_typescript_space_before_method_call_parentheses = false 323 | ij_typescript_space_before_method_left_brace = true 324 | ij_typescript_space_before_method_parentheses = false 325 | ij_typescript_space_before_property_colon = false 326 | ij_typescript_space_before_quest = true 327 | ij_typescript_space_before_switch_left_brace = true 328 | ij_typescript_space_before_switch_parentheses = true 329 | ij_typescript_space_before_try_left_brace = true 330 | ij_typescript_space_before_type_colon = false 331 | ij_typescript_space_before_unary_not = false 332 | ij_typescript_space_before_while_keyword = true 333 | ij_typescript_space_before_while_left_brace = true 334 | ij_typescript_space_before_while_parentheses = true 335 | ij_typescript_spaces_around_additive_operators = true 336 | ij_typescript_spaces_around_arrow_function_operator = true 337 | ij_typescript_spaces_around_assignment_operators = true 338 | ij_typescript_spaces_around_bitwise_operators = true 339 | ij_typescript_spaces_around_equality_operators = true 340 | ij_typescript_spaces_around_logical_operators = true 341 | ij_typescript_spaces_around_multiplicative_operators = true 342 | ij_typescript_spaces_around_relational_operators = true 343 | ij_typescript_spaces_around_shift_operators = true 344 | ij_typescript_spaces_around_unary_operator = false 345 | ij_typescript_spaces_within_array_initializer_brackets = true 346 | ij_typescript_spaces_within_brackets = false 347 | ij_typescript_spaces_within_catch_parentheses = false 348 | ij_typescript_spaces_within_for_parentheses = false 349 | ij_typescript_spaces_within_if_parentheses = false 350 | ij_typescript_spaces_within_imports = true 351 | ij_typescript_spaces_within_interpolation_expressions = false 352 | ij_typescript_spaces_within_method_call_parentheses = false 353 | ij_typescript_spaces_within_method_parentheses = false 354 | ij_typescript_spaces_within_object_literal_braces = true 355 | ij_typescript_spaces_within_object_type_braces = true 356 | ij_typescript_spaces_within_parentheses = false 357 | ij_typescript_spaces_within_switch_parentheses = false 358 | ij_typescript_spaces_within_type_assertion = false 359 | ij_typescript_spaces_within_union_types = true 360 | ij_typescript_spaces_within_while_parentheses = false 361 | ij_typescript_special_else_if_treatment = true 362 | ij_typescript_ternary_operation_signs_on_next_line = false 363 | ij_typescript_ternary_operation_wrap = off 364 | ij_typescript_union_types_wrap = on_every_item 365 | ij_typescript_use_chained_calls_group_indents = false 366 | ij_typescript_use_double_quotes = false 367 | ij_typescript_use_explicit_js_extension = auto 368 | ij_typescript_use_path_mapping = always 369 | ij_typescript_use_public_modifier = false 370 | ij_typescript_use_semicolon_after_statement = true 371 | ij_typescript_var_declaration_wrap = normal 372 | ij_typescript_while_brace_force = never 373 | ij_typescript_while_on_new_line = false 374 | ij_typescript_wrap_comments = false 375 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [nonara] 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build (CI) 2 | 3 | on: [ push, pull_request ] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | strategy: 10 | matrix: 11 | node-version: [ 16.x, 18.x ] 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v2 16 | 17 | - name: Determine Yarn Cache Path 18 | id: yarn-cache-dir-path 19 | run: echo "::set-output name=dir::$(yarn cache dir)" 20 | 21 | - uses: actions/cache@v1 22 | id: yarn-cache # use this to check for `cache-hit` (`steps.yarn-cache.outputs.cache-hit != 'true'`) 23 | with: 24 | path: ${{ steps.yarn-cache-dir-path.outputs.dir }} 25 | key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }} 26 | restore-keys: | 27 | ${{ runner.os }}-yarn- 28 | 29 | - name: Use Node.js ${{ matrix.node-version }} 30 | uses: actions/setup-node@v1 31 | with: 32 | node-version: ${{ matrix.node-version }} 33 | 34 | - name: Install Packages 35 | run: yarn install --frozen-lockfile 36 | 37 | - name: Build 38 | run: yarn build 39 | env: 40 | CI: true 41 | 42 | - name: Test (with coverage) 43 | run: yarn run test:coverage 44 | env: 45 | CI: true 46 | 47 | - name: Post coverage to Coveralls 48 | uses: coverallsapp/github-action@master 49 | with: 50 | github-token: ${{ secrets.GITHUB_TOKEN }} 51 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - v*.*.* 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | 16 | - name: Setup Node.js to publish to npmjs.org 17 | uses: actions/setup-node@v1 18 | with: 19 | node-version: '18.x' 20 | registry-url: 'https://registry.npmjs.org' 21 | 22 | - name: Install Packages 23 | run: yarn install --frozen-lockfile 24 | 25 | - name: Build 26 | run: yarn build 27 | env: 28 | CI: true 29 | 30 | - name: Test 31 | run: yarn run test 32 | env: 33 | CI: true 34 | 35 | - name: Generate Release Body 36 | run: npx extract-changelog-release > RELEASE_BODY.md 37 | 38 | - name: Publish to NPM 39 | run: yarn publish --non-interactive 40 | env: 41 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 42 | 43 | - name: Create GitHub Release 44 | uses: ncipollo/release-action@v1 45 | with: 46 | bodyFile: "RELEASE_BODY.md" 47 | token: ${{ secrets.GITHUB_TOKEN }} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Built 2 | dist 3 | coverage 4 | package-lock.json 5 | *.tsbuildinfo 6 | 7 | # Extensions 8 | *.seed 9 | *.log 10 | *.csv 11 | *.dat 12 | *.out 13 | *.pid 14 | *.gz 15 | 16 | # Personal 17 | .env 18 | .vscode 19 | .idea/jsLibraryMappings.xml 20 | old 21 | TODO.md 22 | 23 | # Junk 24 | temp/ 25 | .DS_Store 26 | tmp 27 | node_modules 28 | 29 | ### JetBrains ### 30 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 31 | 32 | # N/A 33 | .idea/**/webServers.xml 34 | .idea/**/remote-mappings.xml 35 | .idea/**/deployment.xml 36 | 37 | # User-specific stuff 38 | .idea/**/workspace.xml 39 | .idea/**/tasks.xml 40 | .idea/**/usage.statistics.xml 41 | .idea/**/shelf 42 | 43 | # Generated files 44 | .idea/**/contentModel.xml 45 | 46 | # Sensitive or high-churn files 47 | .idea/**/dataSources/ 48 | .idea/**/dataSources.ids 49 | .idea/**/dataSources.local.xml 50 | .idea/**/sqlDataSources.xml 51 | .idea/**/dynamic.xml 52 | .idea/**/uiDesigner.xml 53 | .idea/**/dbnavigator.xml 54 | 55 | # Gradle 56 | .idea/**/gradle.xml 57 | .idea/**/libraries 58 | 59 | # Mongo Explorer plugin 60 | .idea/**/mongoSettings.xml 61 | 62 | # File-based project format 63 | *.iws 64 | 65 | # JIRA plugin 66 | atlassian-ide-plugin.xml 67 | 68 | # Cursive Clojure plugin 69 | .idea/replstate.xml 70 | 71 | # Crashlytics plugin (for Android Studio and IntelliJ) 72 | com_crashlytics_export_strings.xml 73 | crashlytics.properties 74 | crashlytics-build.properties 75 | fabric.properties 76 | 77 | # Editor-based Rest Client 78 | .idea/httpRequests 79 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | node-html-markdown -------------------------------------------------------------------------------- /.idea/codeStyles/Project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 64 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/node-html-markdown.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. 4 | 5 | ## [1.3.0](https://github.com/crosstype/node-html-markdown/compare/v1.2.0...v1.3.0) (2022-12-13) 6 | 7 | 8 | ### Features 9 | 10 | * Added `useInlineLinks` option ([c318667](https://github.com/crosstype/node-html-markdown/commit/c318667334d979db3cf7bcd3600f9dd1618ba5b3)) 11 | * Added option to customize custom strike tag. ([82cf1e3](https://github.com/crosstype/node-html-markdown/commit/82cf1e3c33662254983f35bb2000c62fc3d5870c)) 12 | 13 | 14 | ### Fixes 15 | 16 | * Can't get text from TextNode in the browser (fixes [#40](https://github.com/crosstype/node-html-markdown/issues/40)) ([feec660](https://github.com/crosstype/node-html-markdown/commit/feec6609cc4ad5c00ce81835d60c3a00036f82e7)) 17 | * perfStart and perfStop breaking code in the browser (fixes [#36](https://github.com/crosstype/node-html-markdown/issues/36) ([7d52c92](https://github.com/crosstype/node-html-markdown/commit/7d52c928b718acae578692736cda1f17d4733ab1)) 18 | 19 | ### [1.2.2](https://github.com/crosstype/node-html-markdown/compare/v1.2.0...v1.2.2) (2022-10-26) 20 | 21 | 22 | ### Fixes 23 | 24 | * Can't get text from TextNode in the browser (fixes [#40](https://github.com/crosstype/node-html-markdown/issues/40)) ([feec660](https://github.com/crosstype/node-html-markdown/commit/feec6609cc4ad5c00ce81835d60c3a00036f82e7)) 25 | * perfStart and perfStop breaking code in the browser (fixes [#36](https://github.com/crosstype/node-html-markdown/issues/36) ([7d52c92](https://github.com/crosstype/node-html-markdown/commit/7d52c928b718acae578692736cda1f17d4733ab1)) 26 | 27 | ## [1.2.0](https://github.com/crosstype/node-html-markdown/compare/v1.1.3...v1.2.0) (2022-04-20) 28 | 29 | 30 | ### Features 31 | 32 | * Added table support ([faaebe8](https://github.com/crosstype/node-html-markdown/commit/faaebe8b8b85a6e4680a286f24ff963db097cc66)) 33 | 34 | ### [1.1.3](https://github.com/crosstype/node-html-markdown/compare/v1.1.2...v1.1.3) (2021-10-24) 35 | 36 | 37 | ### Fixes 38 | 39 | * Percent-encode Markdown reserved symbols in URLs ([#26](https://github.com/crosstype/node-html-markdown/issues/26)) ([83d4fff](https://github.com/crosstype/node-html-markdown/commit/83d4fff866eb027ebba052ab45996f679412c42b)) 40 | 41 | ### [1.1.2](https://github.com/crosstype/node-html-markdown/compare/v1.1.1...v1.1.2) (2021-09-18) 42 | 43 | 44 | ### Fixes 45 | 46 | * **upstream:** Parser dependency parses invalid nested A improperly (closes [#25](https://github.com/crosstype/node-html-markdown/issues/25)) ([bdc7caa](https://github.com/crosstype/node-html-markdown/commit/bdc7caaac615428c89729f30b23fa2a29d9a6c56)) 47 | * Correct nesting and other possible issues in child nodes of A tag (closes [#25](https://github.com/crosstype/node-html-markdown/issues/25)) ([dcd6b20](https://github.com/crosstype/node-html-markdown/commit/dcd6b209f630335c314d67e47fd2290218bd6e79)) 48 | 49 | ### [1.1.1](https://github.com/crosstype/node-html-markdown/compare/v1.1.0...v1.1.1) (2021-08-03) 50 | 51 | 52 | ### Fixes 53 | 54 | * Preformatted code block node contents getting trimmed (fixes [#20](https://github.com/crosstype/node-html-markdown/issues/20)) ([af79995](https://github.com/crosstype/node-html-markdown/commit/af799956d94d7a06c50df71746bcfac8f31e342e)) 55 | 56 | ## [1.1.0](https://github.com/crosstype/node-html-markdown/compare/v1.0.1...v1.1.0) (2021-07-26) 57 | 58 | 59 | ### Features 60 | 61 | * Added `preserveIfEmpty` translator option to allow triggering translators that do not have children (closes [#19](https://github.com/crosstype/node-html-markdown/issues/19)) ([c3d8829](https://github.com/crosstype/node-html-markdown/commit/c3d88296de6c51b016524406021718218d0c412b)) 62 | 63 | 64 | ### Fixes 65 | 66 | * Codeblocks apply markdown formatting to contents (fixes [#22](https://github.com/crosstype/node-html-markdown/issues/22)) ([040f81e](https://github.com/crosstype/node-html-markdown/commit/040f81edde8ec21e393a2b38273f9427751bfad2)) 67 | * Whitespace not always properly handled (fixes [#20](https://github.com/crosstype/node-html-markdown/issues/20) [#21](https://github.com/crosstype/node-html-markdown/issues/21)) ([8c43a22](https://github.com/crosstype/node-html-markdown/commit/8c43a22c3da5a5f5134fc52e08015b476cfbbb45)) 68 | 69 | ### [1.0.1](https://github.com/crosstype/node-html-markdown/compare/v1.0.0...v1.0.1) (2021-07-13) 70 | 71 | 72 | ### Fixes 73 | 74 | * Whitespace between some nodes not properly handled (fixes [#16](https://github.com/crosstype/node-html-markdown/issues/16)) ([a7abf81](https://github.com/crosstype/node-html-markdown/commit/a7abf81add691e199587ca85600fb0f4c6876a01)) 75 | 76 | ## [1.0.0](https://github.com/crosstype/node-html-markdown/compare/v0.1.7...v1.0.0) (2021-07-11) 77 | 78 | 79 | ### Features 80 | 81 | * Add useLinkReferenceDefinitions option (closes [#15](https://github.com/crosstype/node-html-markdown/issues/15)) ([a7caef1](https://github.com/crosstype/node-html-markdown/commit/a7caef106a37a5de618e7072ed4e329a1c4c4f95)) 82 | * Improved spacing in and around bold, italic, and strikethrough tags ([8198524](https://github.com/crosstype/node-html-markdown/commit/8198524680ec3e6e5d8578b18fe58067158774bb)) 83 | * Prefer links when possible (closes [#17](https://github.com/crosstype/node-html-markdown/issues/17)) ([613e8bb](https://github.com/crosstype/node-html-markdown/commit/613e8bb5d39ca84efcce13c33c1fda8206a9d924)) 84 | 85 | 86 | ### Fixes 87 | 88 | * Ensure html entities are decoded (Fixes [#14](https://github.com/crosstype/node-html-markdown/issues/14)) ([1e59887](https://github.com/crosstype/node-html-markdown/commit/1e59887ea9baea37d72d977943cfda936e925924)) 89 | * Nested text formatting tags can break formatting (fixes [#18](https://github.com/crosstype/node-html-markdown/issues/18)) ([7640e33](https://github.com/crosstype/node-html-markdown/commit/7640e334936e6cd678cc1ea960b77135832afd55)) 90 | 91 | ### [0.1.7](https://github.com/crosstype/node-html-markdown/compare/v0.1.6...v0.1.7) (2021-06-11) 92 | 93 | 94 | ### Fixes 95 | 96 | * Allow for leading/trailing spaces in node content (fixes [#9](https://github.com/crosstype/node-html-markdown/issues/9)) ([ff61746](https://github.com/crosstype/node-html-markdown/commit/ff617463d9a0c18f5c58f31feda0c06a69a34d27)) 97 | 98 | ### [0.1.6](https://github.com/crosstype/node-html-markdown/compare/v0.1.5...v0.1.6) (2021-03-28) 99 | 100 | 101 | ### Fixes 102 | 103 | * **tsconfig:** Set output target to es2017 to support Node v10+ (Fixes [#7](https://github.com/crosstype/node-html-markdown/issues/7)) ([#8](https://github.com/crosstype/node-html-markdown/issues/8)) ([dd63205](https://github.com/crosstype/node-html-markdown/commit/dd63205a5019ab84ac5010cf0e2f06cbc5ffabb2)) 104 | 105 | ### [0.1.5](https://github.com/crosstype/node-html-markdown/compare/v0.1.4...v0.1.5) (2021-01-03) 106 | 107 | ### [0.1.3](https://github.com/crosstype/node-html-markdown/compare/v0.1.2...v0.1.3) (2020-11-28) 108 | 109 | 110 | ### Fixes 111 | 112 | * Fixed performance bottleneck (doubled speed) ([6d59c27](https://github.com/crosstype/node-html-markdown/commit/6d59c275f5f812d998ad36c09aeafa84191ed0a9)) 113 | 114 | ### [0.1.2](https://github.com/crosstype/node-html-markdown/compare/v0.1.1...v0.1.2) (2020-11-28) 115 | 116 | 117 | ### Fixes 118 | 119 | * Fixed wrong install instructions in readme ([e76df44](https://github.com/crosstype/node-html-markdown/commit/e76df44d3244888238a8962b5559e3a19a53675b)) 120 | 121 | ### [0.1.1](https://github.com/crosstype/node-html-markdown/compare/v0.0.3...v0.1.1) (2020-11-28) 122 | 123 | 124 | ### Fixes 125 | 126 | * Fixed broken benchmark file speed stats ([d12b702](https://github.com/crosstype/node-html-markdown/commit/d12b702274a2872d38d2b53269929002fc3924b7)) 127 | 128 | 129 | ## [0.1.0](https://github.com/crosstype/node-html-markdown/compare/v0.0.3...v0.1.0) (2020-11-28) 130 | 131 | 132 | ### Features 133 | 134 | * Added performance enhancements + improved benchmark display ([4777441](https://github.com/crosstype/node-html-markdown/commit/477744167d4e1ffce8c7bcbfbc34b5cd88aabf74)) 135 | 136 | 137 | ## [0.0.5](https://github.com/crosstype/node-html-markdown/v0.0.0...v0.0.5) - 2020-11-27 138 | 139 | - Released initial version 140 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![npm version](https://badge.fury.io/js/node-html-markdown.svg)](https://badge.fury.io/js/ts-patch) 2 | [![NPM Downloads](https://img.shields.io/npm/dm/node-html-markdown.svg?style=flat)](https://npmjs.org/package/node-html-markdown) 3 | ![Build Status](https://github.com/crosstype/node-html-markdown/workflows/Build%20(CI)/badge.svg) 4 | [![Coverage Status](https://coveralls.io/repos/github/crosstype/node-html-markdown/badge.svg?branch=master)](https://coveralls.io/github/crosstype/node-html-markdown?branch=master) 5 | 6 | # node-html-markdown 7 | 8 | NHM is a _fast_ HTML to markdown converter, compatible with both node and the browser. 9 | 10 | It was built with the following two goals in mind: 11 | 12 | ### 1. Speed 13 | 14 | We had a need to convert gigabytes of HTML daily very quickly. All libraries we found were too slow with node. 15 | We considered using a low-level language but decided to attempt to write something that would squeeze every bit 16 | of performance out of the JIT that we could. The end result was fast enough to make the cut! 17 | 18 | ### 2. Human Readability 19 | 20 | The other libraries we tested produced output that would break in numerous conditions and produced output with many 21 | repeating linefeeds, etc. Generally speaking, outside of a markdown viewer, the result was not easy to read. 22 | 23 | We took the approach of producing a _clean, concise_ result with consistent spacing rules. 24 | 25 | ## Install 26 | 27 | ```sh 28 | add node-html-markdown 29 | ``` 30 | 31 | ## Benchmarks 32 | ``` 33 | ----------------------------------------------------------------------------- 34 | 35 | Estimated processing times (fastest to slowest): 36 | 37 | [node-html-markdown (reused instance)] 38 | 100 kB: 17ms 39 | 1 MB: 176ms 40 | 50 MB: 8.80sec 41 | 1 GB: 3min, 0sec 42 | 50 GB: 2hr, 30min, 14sec 43 | 44 | [turndown (reused instance)] 45 | 100 kB: 27ms 46 | 1 MB: 280ms 47 | 50 MB: 13.98sec 48 | 1 GB: 4min, 46sec 49 | 50 GB: 3hr, 58min, 35sec 50 | 51 | ----------------------------------------------------------------------------- 52 | 53 | Speed comparison - node-html-markdown (reused instance) is: 54 | 55 | 1.02 times as fast as node-html-markdown 56 | 1.57 times as fast as turndown 57 | 1.59 times as fast as turndown (reused instance) 58 | 59 | ----------------------------------------------------------------------------- 60 | ``` 61 | 62 | ## Usage 63 | 64 | ```ts 65 | import { NodeHtmlMarkdown, NodeHtmlMarkdownOptions } from 'node-html-markdown' 66 | 67 | 68 | /* ********************************************************* * 69 | * Single use 70 | * If using it once, you can use the static method 71 | * ********************************************************* */ 72 | 73 | // Single file 74 | NodeHtmlMarkdown.translate( 75 | /* html */ `hello`, 76 | /* options (optional) */ {}, 77 | /* customTranslators (optional) */ undefined, 78 | /* customCodeBlockTranslators (optional) */ undefined 79 | ); 80 | 81 | // Multiple files 82 | NodeHtmlMarkdown.translate( 83 | /* FileCollection */ { 84 | 'file1.html': `hello`, 85 | 'file2.html': `goodbye` 86 | }, 87 | /* options (optional) */ {}, 88 | /* customTranslators (optional) */ undefined, 89 | /* customCodeBlockTranslators (optional) */ undefined 90 | ); 91 | 92 | 93 | /* ********************************************************* * 94 | * Re-use 95 | * If using it several times, creating an instance saves time 96 | * ********************************************************* */ 97 | 98 | const nhm = new NodeHtmlMarkdown( 99 | /* options (optional) */ {}, 100 | /* customTransformers (optional) */ undefined, 101 | /* customCodeBlockTranslators (optional) */ undefined 102 | ); 103 | 104 | // Single file 105 | nhm.translate(/* html */ `hello`); 106 | 107 | // Multiple Files 108 | nhm.translate( 109 | /* FileCollection */ { 110 | 'file1.html': `hello`, 111 | 'file2.html': `goodbye` 112 | }, 113 | ); 114 | ``` 115 | 116 | ## Options 117 | 118 | ```ts 119 | 120 | export interface NodeHtmlMarkdownOptions { 121 | /** 122 | * Use native window DOMParser when available 123 | * @default false 124 | */ 125 | preferNativeParser: boolean, 126 | 127 | /** 128 | * Code block fence 129 | * @default ``` 130 | */ 131 | codeFence: string, 132 | 133 | /** 134 | * Bullet marker 135 | * @default * 136 | */ 137 | bulletMarker: string, 138 | 139 | /** 140 | * Style for code block 141 | * @default fence 142 | */ 143 | codeBlockStyle: 'indented' | 'fenced', 144 | 145 | /** 146 | * Emphasis delimiter 147 | * @default _ 148 | */ 149 | emDelimiter: string, 150 | 151 | /** 152 | * Strong delimiter 153 | * @default ** 154 | */ 155 | strongDelimiter: string, 156 | 157 | /** 158 | * Strong delimiter 159 | * @default ~~ 160 | */ 161 | strikeDelimiter: string, 162 | 163 | /** 164 | * Supplied elements will be ignored (ignores inner text does not parse children) 165 | */ 166 | ignore?: string[], 167 | 168 | /** 169 | * Supplied elements will be treated as blocks (surrounded with blank lines) 170 | */ 171 | blockElements?: string[], 172 | 173 | /** 174 | * Max consecutive new lines allowed 175 | * @default 3 176 | */ 177 | maxConsecutiveNewlines: number, 178 | 179 | /** 180 | * Line Start Escape pattern 181 | * (Note: Setting this will override the default escape settings, you might want to use textReplace option instead) 182 | */ 183 | lineStartEscape: [ pattern: RegExp, replacement: string ] 184 | 185 | /** 186 | * Global escape pattern 187 | * (Note: Setting this will override the default escape settings, you might want to use textReplace option instead) 188 | */ 189 | globalEscape: [ pattern: RegExp, replacement: string ] 190 | 191 | /** 192 | * User-defined text replacement pattern (Replaces matching text retrieved from nodes) 193 | */ 194 | textReplace?: [ pattern: RegExp, replacement: string ][] 195 | 196 | /** 197 | * Keep images with data: URI (Note: These can be up to 1MB each) 198 | * @example 199 | * 200 | * @default false 201 | */ 202 | keepDataImages?: boolean 203 | 204 | /** 205 | * Place URLS at the bottom and format links using link reference definitions 206 | * 207 | * @example 208 | * Click here. Or here. Or this link. 209 | * 210 | * Becomes: 211 | * Click [here][1]. Or [here][2]. Or [this link][1]. 212 | * 213 | * [1]: /url 214 | * [2]: /url2 215 | */ 216 | useLinkReferenceDefinitions?: boolean 217 | 218 | /** 219 | * Wrap URL text in < > instead of []() syntax. 220 | * 221 | * @example 222 | * The input https://google.com 223 | * becomes 224 | * instead of [https://google.com](https://google.com) 225 | * 226 | * @default true 227 | */ 228 | useInlineLinks?: boolean 229 | } 230 | ``` 231 | 232 | ## Custom Translators 233 | 234 | Custom translators are an advanced option to allow handling certain elements a specific way. 235 | 236 | These can be modified via the `NodeHtmlMarkdown#translators` property, or added during creation. 237 | 238 | __For detail on how to use them see__: 239 | 240 | - [translator.ts](https://github.com/crosstype/node-html-markdown/blob/master/src/translator.ts) - Documentation for `TranslatorConfig` 241 | - [config.ts](https://github.com/crosstype/node-html-markdown/blob/master/src/config.ts) - Translators in `defaultTranslators` 242 | 243 | The `NodeHtmlMarkdown#codeBlockTranslators` property is a collection of translators which handles elements within a `
` block.
244 | 
245 | ## Further improvements
246 | 
247 | Being a performance-centric library, we're always interested in further improvements. 
248 | There are several probable routes by which we could gain substantial performance increases over the current model. 
249 | 
250 | Such methods include:
251 | 
252 | - Writing a custom parser
253 | - Integrating an async worker-thread based model for multi-threading
254 | - Fully replacing any remaining regex
255 |   
256 | These would be fun to implement; however, for the time being, the present library is fast enough for my purposes. That
257 | said, I welcome discussion and any PR toward the effort of further improving performance, and I may ultimately do more
258 | work in that capacity in the future!
259 | 
260 | ## Help Wanted!
261 | 
262 | Looking to contribute? Check out our [help wanted] list for a good place to start!
263 | 
264 | 
265 | [help wanted]: https://github.com/crosstype/node-html-markdown/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
266 | 


--------------------------------------------------------------------------------
/benchmark/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Andreas Madsen
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmark Tool
 2 | 
 3 | Simple benchmark for different html to markdown compilers using real-life data.
 4 | 
 5 | Based on: https://github.com/AndreasMadsen/htmlparser-benchmark
 6 | 
 7 | ## Usage
 8 | 
 9 | ```shell
10 | yarn run benchmark
11 | ```
12 | 


--------------------------------------------------------------------------------
/benchmark/_run.js:
--------------------------------------------------------------------------------
 1 | const Benchmark = require('./index.js');
 2 | const ProgressBar = require('progress');
 3 | 
 4 | 
 5 | /* ****************************************************************************************************************** *
 6 |  * Handlers
 7 |  * ****************************************************************************************************************** */
 8 | 
 9 | process.on('uncaughtException', function(e){
10 |   console.error(e);
11 | 	process.exit(1);
12 | });
13 | 
14 | process.on('message', function (item) {
15 | 	const bar = new ProgressBar('[:bar] :current / :total', {
16 | 		total: Benchmark.TOTAL,
17 | 		complete: '=',
18 | 		incomplete: ' ',
19 | 		width: 50,
20 |     clear: true
21 | 	});
22 | 
23 |   const parser = require(item.parser);
24 |   const bench = new Benchmark(parser);
25 | 
26 | 	bench.on('progress', () => bar.tick());
27 | 
28 | 	bench.once('result', function (stat) {
29 | 	  const mean = stat.mean();
30 | 		process.send({
31 | 			mean: mean,
32 | 			sd: stat.sd(),
33 |       totalFiles: Benchmark.TOTAL,
34 |       avgFileSize: Benchmark.AVG_FILE_SIZE,
35 |       avgBytesPerMs: Benchmark.AVG_FILE_SIZE / mean
36 | 		});
37 | 		process.exit(0);
38 | 	});
39 | });
40 | 


--------------------------------------------------------------------------------
/benchmark/execute.js:
--------------------------------------------------------------------------------
  1 | const fs = require('fs');
  2 | const path = require('path');
  3 | const async = require('async');
  4 | const { fork } = require('child_process');
  5 | 
  6 | 
  7 | /* ****************************************************************************************************************** */
  8 | // region: Config / Const
  9 | /* ****************************************************************************************************************** */
 10 | 
 11 | const quickMode = process.argv[2] === 'quick'
 12 | 
 13 | const wrappers = fs
 14 |   .readdirSync(path.join(__dirname, 'wrapper'))
 15 |   .sort((a, b) => a.localeCompare(b))
 16 |   .map(filename => ({
 17 |       name: path.basename(filename, '.js').replace('_reuse', ' (reused instance)'),
 18 |       parser: path.join(__dirname, 'wrapper', filename)
 19 |     })
 20 |   );
 21 | 
 22 | const MAX_WIDTH = Math.max(...wrappers.map(wrapper => wrapper.name.length));
 23 | 
 24 | const SEPARATOR = '\n' + '-'.repeat(MAX_WIDTH + 41) + '\n';
 25 | 
 26 | // endregion
 27 | 
 28 | 
 29 | /* ****************************************************************************************************************** */
 30 | // region: Helpers
 31 | /* ****************************************************************************************************************** */
 32 | 
 33 | function formatName(name) {
 34 |   const left = MAX_WIDTH - name.length;
 35 |   let str = name;
 36 |   for (let i = 0; i < left; i++) str += ' ';
 37 |   return str;
 38 | }
 39 | 
 40 | function humanFileSize(size) {
 41 |   const i = Math.floor( Math.log(size) / Math.log(1024) );
 42 |   return ( size / Math.pow(1024, i) ).toFixed(2) * 1 + ' ' + ['B', 'kB', 'MB', 'GB', 'TB'][i];
 43 | }
 44 | 
 45 | /**
 46 |  * Turn seconds into written time form
 47 |  */
 48 | function humanTime(seconds) {
 49 |   let s = seconds;
 50 |   const hours = Math.floor(s / 3600);
 51 |   s -= (hours * 3600);
 52 |   const minutes = Math.floor(s / 60);
 53 |   s -= (minutes * 60);
 54 | 
 55 |   for (const n of [ hours, minutes, s ]) if (!isFinite(n) || isNaN(n)) return 'N/A';
 56 | 
 57 |   return (!hours && !minutes && seconds < 1) ? `${Math.round((s % 1) * 1000)}ms` :
 58 |          (!hours && !minutes) ? `${s.toFixed(2)}sec` :
 59 |          `${hours ? hours + 'hr, ' : ''}${minutes ? minutes + 'min, ' : ''}${Math.round(s)}sec`;
 60 | }
 61 | 
 62 | // endregion
 63 | 
 64 | 
 65 | /* ****************************************************************************************************************** */
 66 | // region: Implementation
 67 | /* ****************************************************************************************************************** */
 68 | 
 69 | (function run() {
 70 |   if (!quickMode) console.log('NOTE: Large mode is generally less reliable in most environments!');
 71 |   const stats = [];
 72 | 
 73 |   console.log(SEPARATOR);
 74 | 
 75 |   async.eachSeries(
 76 |     wrappers,
 77 |     function (item, done) {
 78 |       const runner = fork(path.join(__dirname, '_run.js'), void 0, { env: { QUICK_MODE: quickMode, LOG_PERF: true }});
 79 |       runner.send(item);
 80 |       runner.on('message', function (stat) {
 81 |         const name = formatName(item.name);
 82 |         const mean = stat.mean.toPrecision(6);
 83 |         const sd = stat.sd.toPrecision(6);
 84 |         const avgBytesPerSec = (stat.avgBytesPerMs * 1000);
 85 | 
 86 |         stats.push({ name, ...stat });
 87 |         console.log(`${name}: ${mean} ms/file ± ${sd} (${humanFileSize(avgBytesPerSec)}/s)`);
 88 |       });
 89 | 
 90 |       runner.on('close', function (n) {
 91 |         if (n) console.log('%s failed (exit code %d)', item.name, n);
 92 |         done();
 93 |       });
 94 |     },
 95 |     function () {
 96 |       console.log(SEPARATOR);
 97 |       console.log(
 98 |         `Total Files: ${stats[0].totalFiles}\n`+
 99 |         `Avg. file size: ${humanFileSize(stats[0].avgFileSize)}`
100 |       );
101 | 
102 |       /* Get speed estimates */
103 |       console.log(SEPARATOR);
104 |       console.log(`Estimated processing times (fastest to slowest):`);
105 |       const sortedStats = [ ...stats ].sort((a,b) => b.avgBytesPerMs - a.avgBytesPerMs)
106 |       sortedStats.forEach(({ name, avgBytesPerMs }) => {
107 |         console.log(`\n  [${name.trim()}]`);
108 |         [ 100, 1024, 51200, 1048576, 52428800 ].map(kbSize => {
109 |           const byteSize = kbSize * 1024;
110 |           const secToComplete = ((byteSize / avgBytesPerMs) / 1000);
111 |           const tag = humanFileSize(byteSize);
112 |           const spacing = 8 - tag.length;
113 |           console.log(`    ${tag}:${' '.repeat(spacing)}${humanTime(secToComplete)}`);
114 |         }).join('\n')
115 |       });
116 | 
117 |       /* Get comparisons */
118 |       console.log(SEPARATOR);
119 |       console.log(`Speed comparison - ${sortedStats[0].name.trim()} is: \n`);
120 |       const fastestMean = sortedStats[0].mean;
121 |       sortedStats.slice(1).forEach(({ name, mean }) =>
122 |         console.log(`  ${((mean / fastestMean)).toFixed(2)} times as fast as ${name.trim()}`)
123 |       );
124 | 
125 |       console.log(SEPARATOR);
126 |     }
127 |   );
128 | })();
129 | 
130 | // endregion
131 | 


--------------------------------------------------------------------------------
/benchmark/files/5f8b89390d3fc01c6a80728ba2aee597fea1dbfc8399d61015956db71e5336c7.html:
--------------------------------------------------------------------------------
 1 | FDA OKs radiation-based prostate cancer drug
Join the Nation's Conversation

To find out more about Facebook commenting please read the Conversation Guidelines and FAQs

Share This Story!

Let friends in your social network know what you are reading about

FDA OKs radiation-based prostate cancer drug

WASHINGTON (AP) — The U.S. Food and Drug Administration has approved a new injectable drug that uses radiation to treat advanced prostate cancer that has spread to the bones.

The FDA said Wednesday it approved the drug, Xofigo from Bayer Pharmaceuticals, for men whose cancer has grown into bone tumors even after receiving medication or surgery to lower testosterone. The hormone spurs growth of prostate tumors.

Regulators approved Xofigo based on a study of 809 men with advanced prostate cancer who received the drug or placebo. Patients taking Xofigo typically lived 14 months compared to 11.2 months for those taking placebo.

Xofigo's side effects include nausea and diarrhea.

Copyright 2013 The Associated Press. All rights reserved. This material may not be published, broadcast, rewritten or redistributed.

50 | -------------------------------------------------------------------------------- /benchmark/files/8bd6d9bcba689408767f770d69f12b59c3f092e73cffcc9332261fbab4aa16e1.html: -------------------------------------------------------------------------------- 1 | Tech stocks: Google, Microsoft to report earnings
22 | 35 23 |
Share This Story!

Let friends in your social network know what you are reading about

Tech stocks: Google, Microsoft to report earnings

It's time for a pair of tech heavyweights to report quarterly earnings after the markets close Thursday. Let's look at the technology stocks to watch. Google shares up slightly. With its stock price slowly

Posted!

A link has been posted to your Facebook feed.

Sent!

A link has been sent to your friend's email address.

Join the Nation's Conversation

To find out more about Facebook commenting please read the Conversation Guidelines and FAQs

Tech stocks: Google, Microsoft to report earnings

SHARE 106 | 35 107 | COMMENTMORE

It's time for a pair of tech heavyweights to report quarterly earnings after the markets close Thursday. Let's look at the technology stocks to watch.

Google shares up slightly. With its stock price slowly marching toward the $1,000 mark, the tech titan reports second-quarter earnings after the bell.

Analysts expect Google to report an earnings per share of $10.78 with revenue of just over $14 billion.

Shares of Google have surged in the past 12 months, adding more than $300 in value since hitting a 52-week low of $580.76 on this day a year ago.

Microsoft to report earnings. Shares of the Redmond, Wash., company are barely up in pre-market trading as the company reports quarterly earnings after unveiling a massive restructuring.

Last week, Microsoft CEO Steve Ballmer announced a major reorganization of the company he says will help them become more efficient.

The company has also made news related to its product line. Last week, the company slashed prices on its Surface RT tablets by $150. In May, the company revealed the Xbox One, its video game console that will succeed the Xbox 360. However, reception to the console has been mixed, primarily due to restrictions to software that have since been removed.

Follow Brett Molina on Twitter: @bam923.

151 | -------------------------------------------------------------------------------- /benchmark/index.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | const util = require('util'); 4 | const events = require('events'); 5 | const async = require('async'); 6 | const summary = require('summary'); 7 | 8 | 9 | /* ****************************************************************************************************************** */ 10 | // region: Load files 11 | /* ****************************************************************************************************************** */ 12 | 13 | const fileNames = fs.readdirSync(path.resolve(__dirname, 'files')); 14 | const FILES = []; 15 | for (let i = 0; i < fileNames.length; i++) { 16 | if (process.env.QUICK_MODE === 'true' && i >= 25) break; 17 | 18 | const fileName = fileNames[i]; 19 | const filePath = path.resolve(__dirname, 'files', fileName); 20 | FILES.push({ 21 | key: path.basename(fileName, '.html'), 22 | file: filePath, 23 | fileSize: fs.statSync(filePath).size 24 | }); 25 | } 26 | 27 | // endregion 28 | 29 | 30 | /* ****************************************************************************************************************** */ 31 | // region: Benchmark 32 | /* ****************************************************************************************************************** */ 33 | 34 | function Benchmark(parser) { 35 | if (!(this instanceof Benchmark)) return new Benchmark(parser); 36 | 37 | this._parser = parser; 38 | async.mapSeries(FILES, this._file.bind(this), this._done.bind(this)); 39 | } 40 | 41 | // The total amount of files 42 | Benchmark.TOTAL = FILES.length; 43 | 44 | // Average file size 45 | Benchmark.AVG_FILE_SIZE = Math.round(FILES.reduce((acc, { fileSize }) => acc + fileSize, 0) / FILES.length); 46 | 47 | // Parse a file 48 | Benchmark.prototype._file = function (item, done) { 49 | const self = this; 50 | 51 | fs.readFile(item.file, 'utf8', function (err, html) { 52 | if (err) return done(err); 53 | 54 | const tic = process.hrtime(); 55 | self._parser(html, function (err) { 56 | const toc = process.hrtime(tic); 57 | 58 | if (err) { 59 | done(err, toc); 60 | } else { 61 | self.emit('progress', item.key); 62 | done(null, toc); 63 | } 64 | }); 65 | }); 66 | }; 67 | 68 | // Benchmark for this parser is done 69 | Benchmark.prototype._done = function (err, times) { 70 | if (err) return this.emit('error', err); 71 | 72 | const stat = summary(times.map(function (time) { 73 | return time[0] * 1e3 + time[1] / 1e6; 74 | })); 75 | 76 | this.emit('result', stat); 77 | }; 78 | 79 | util.inherits(Benchmark, events.EventEmitter); 80 | 81 | // endregion 82 | 83 | 84 | /* ****************************************************************************************************************** * 85 | * Exports 86 | * ****************************************************************************************************************** */ 87 | 88 | module.exports = Benchmark; 89 | -------------------------------------------------------------------------------- /benchmark/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "benchmark-tool", 3 | "private": true, 4 | "description": "Simple benchmark tool for JS html to markdown compilers", 5 | "main": "./index.js", 6 | "scripts": { 7 | "benchmark": "node execute.js", 8 | "benchmark:quick": "node execute.js quick" 9 | }, 10 | "bin": { 11 | "htmltomarkdown-benchmark": "./execute.js" 12 | }, 13 | "dependencies": { 14 | "async": "^3.2.3", 15 | "node-html-markdown": "link:../", 16 | "summary": "^2.1.0", 17 | "turndown": "^7.1.1" 18 | }, 19 | "devDependencies": { 20 | "progress": "^2.0.3" 21 | }, 22 | "license": "MIT", 23 | "engines": { 24 | "node": "0.10 || 0.11" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /benchmark/wrapper/node-html-markdown.js: -------------------------------------------------------------------------------- 1 | const { NodeHtmlMarkdown } = require('node-html-markdown'); 2 | 3 | module.exports = function (html, callback) { 4 | NodeHtmlMarkdown.translate(html); 5 | callback(null); 6 | }; 7 | -------------------------------------------------------------------------------- /benchmark/wrapper/node-html-markdown_reuse.js: -------------------------------------------------------------------------------- 1 | const { NodeHtmlMarkdown } = require('node-html-markdown'); 2 | const nhm = new NodeHtmlMarkdown(); 3 | 4 | module.exports = function (html, callback) { 5 | nhm.translate(html); 6 | callback(null); 7 | }; 8 | -------------------------------------------------------------------------------- /benchmark/wrapper/turndown.js: -------------------------------------------------------------------------------- 1 | const TurndownService = require('turndown'); 2 | 3 | module.exports = function (html, callback) { 4 | (new TurndownService()).turndown(html); 5 | callback(null); 6 | }; 7 | -------------------------------------------------------------------------------- /benchmark/wrapper/turndown_reuse.js: -------------------------------------------------------------------------------- 1 | const TurndownService = require('turndown'); 2 | const td = new TurndownService(); 3 | 4 | module.exports = function (html, callback) { 5 | td.turndown(html); 6 | callback(null); 7 | }; 8 | -------------------------------------------------------------------------------- /benchmark/yarn.lock: -------------------------------------------------------------------------------- 1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. 2 | # yarn lockfile v1 3 | 4 | 5 | async@^3.2.3: 6 | version "3.2.3" 7 | resolved "https://registry.yarnpkg.com/async/-/async-3.2.3.tgz#ac53dafd3f4720ee9e8a160628f18ea91df196c9" 8 | integrity sha512-spZRyzKL5l5BZQrr/6m/SqFdBN0q3OCI0f9rjfBzCMBIP4p75P620rR3gTmaksNOhmzgdxcaxdNfMy6anrbM0g== 9 | 10 | boolbase@^1.0.0: 11 | version "1.0.0" 12 | resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" 13 | integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24= 14 | 15 | css-select@^4.2.1: 16 | version "4.3.0" 17 | resolved "https://registry.yarnpkg.com/css-select/-/css-select-4.3.0.tgz#db7129b2846662fd8628cfc496abb2b59e41529b" 18 | integrity sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ== 19 | dependencies: 20 | boolbase "^1.0.0" 21 | css-what "^6.0.1" 22 | domhandler "^4.3.1" 23 | domutils "^2.8.0" 24 | nth-check "^2.0.1" 25 | 26 | css-what@^6.0.1: 27 | version "6.1.0" 28 | resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" 29 | integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== 30 | 31 | dom-serializer@^1.0.1: 32 | version "1.3.2" 33 | resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.3.2.tgz#6206437d32ceefaec7161803230c7a20bc1b4d91" 34 | integrity sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig== 35 | dependencies: 36 | domelementtype "^2.0.1" 37 | domhandler "^4.2.0" 38 | entities "^2.0.0" 39 | 40 | domelementtype@^2.0.1, domelementtype@^2.2.0: 41 | version "2.2.0" 42 | resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.2.0.tgz#9a0b6c2782ed6a1c7323d42267183df9bd8b1d57" 43 | integrity sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A== 44 | 45 | domhandler@^4.2.0: 46 | version "4.2.0" 47 | resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.0.tgz#f9768a5f034be60a89a27c2e4d0f74eba0d8b059" 48 | integrity sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA== 49 | dependencies: 50 | domelementtype "^2.2.0" 51 | 52 | domhandler@^4.3.1: 53 | version "4.3.1" 54 | resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.3.1.tgz#8d792033416f59d68bc03a5aa7b018c1ca89279c" 55 | integrity sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ== 56 | dependencies: 57 | domelementtype "^2.2.0" 58 | 59 | domino@^2.1.6: 60 | version "2.1.6" 61 | resolved "https://registry.yarnpkg.com/domino/-/domino-2.1.6.tgz#fe4ace4310526e5e7b9d12c7de01b7f485a57ffe" 62 | integrity sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ== 63 | 64 | domutils@^2.8.0: 65 | version "2.8.0" 66 | resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.8.0.tgz#4437def5db6e2d1f5d6ee859bd95ca7d02048135" 67 | integrity sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A== 68 | dependencies: 69 | dom-serializer "^1.0.1" 70 | domelementtype "^2.2.0" 71 | domhandler "^4.2.0" 72 | 73 | entities@^2.0.0: 74 | version "2.2.0" 75 | resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" 76 | integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== 77 | 78 | he@1.2.0: 79 | version "1.2.0" 80 | resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f" 81 | integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw== 82 | 83 | "node-html-markdown@link:..": 84 | version "0.0.0" 85 | uid "" 86 | 87 | node-html-parser@^5.3.3: 88 | version "5.3.3" 89 | resolved "https://registry.yarnpkg.com/node-html-parser/-/node-html-parser-5.3.3.tgz#2845704f3a7331a610e0e551bf5fa02b266341b6" 90 | integrity sha512-ncg1033CaX9UexbyA7e1N0aAoAYRDiV8jkTvzEnfd1GDvzFdrsXLzR4p4ik8mwLgnaKP/jyUFWDy9q3jvRT2Jw== 91 | dependencies: 92 | css-select "^4.2.1" 93 | he "1.2.0" 94 | 95 | nth-check@^2.0.1: 96 | version "2.0.1" 97 | resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.0.1.tgz#2efe162f5c3da06a28959fbd3db75dbeea9f0fc2" 98 | integrity sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w== 99 | dependencies: 100 | boolbase "^1.0.0" 101 | 102 | progress@^2.0.3: 103 | version "2.0.3" 104 | resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" 105 | integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== 106 | 107 | summary@^2.1.0: 108 | version "2.1.0" 109 | resolved "https://registry.yarnpkg.com/summary/-/summary-2.1.0.tgz#be8a49a0aa34eb6ceea56042cae88f8add4b0885" 110 | integrity sha512-nMIjMrd5Z2nuB2RZCKJfFMjgS3fygbeyGk9PxPPaJR1RIcyN9yn4A63Isovzm3ZtQuEkLBVgMdPup8UeLH7aQw== 111 | 112 | turndown@^7.1.1: 113 | version "7.1.1" 114 | resolved "https://registry.yarnpkg.com/turndown/-/turndown-7.1.1.tgz#96992f2d9b40a1a03d3ea61ad31b5a5c751ef77f" 115 | integrity sha512-BEkXaWH7Wh7e9bd2QumhfAXk5g34+6QUmmWx+0q6ThaVOLuLUqsnkq35HQ5SBHSaxjSfSM7US5o4lhJNH7B9MA== 116 | dependencies: 117 | domino "^2.1.6" 118 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: "node", 3 | preset: 'ts-jest', 4 | testRegex: '.*(test|spec)\\.tsx?$', 5 | moduleFileExtensions: [ 'ts', 'tsx', 'js', 'jsx', 'json', 'node' ], 6 | transform: { 7 | '^.+\\.tsx?$': [ 8 | 'ts-jest', 9 | { 10 | tsconfig: '/test/tsconfig.json' 11 | } 12 | ] 13 | }, 14 | modulePaths: [ "" ], 15 | testTimeout: 10000, 16 | roots: [ '' ], 17 | collectCoverageFrom: [ "src/**/*.ts" ] 18 | } 19 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node-html-markdown", 3 | "description": "Fast HTML to markdown cross-compiler, compatible with both node and the browser", 4 | "version": "1.3.0", 5 | "main": "dist/index.js", 6 | "types": "dist/index.d.ts", 7 | "scripts": { 8 | "compile": "tsc", 9 | "build": "yarn run clean && yarn run compile", 10 | "clean": "npx -y rimraf coverage dist **/*.tsbuildinfo", 11 | "------------- ": "-------------", 12 | "benchmark": "cd benchmark && yarn run benchmark quick", 13 | "benchmark:large": "cd benchmark && yarn run benchmark", 14 | "test": "jest", 15 | "test:coverage": "jest --collect-coverage", 16 | "------------- ": "-------------", 17 | "prepare": "ts-patch patch tsc --silent && cd benchmark && yarn install" 18 | }, 19 | "files": [ 20 | "README.md", 21 | "CHANGELOG.md", 22 | "dist" 23 | ], 24 | "keywords": [ 25 | "html", 26 | "markdown", 27 | "converter", 28 | "md", 29 | "html5", 30 | "node-html-parser", 31 | "fast-html-parser", 32 | "turndown" 33 | ], 34 | "author": { 35 | "name": "Ron S.", 36 | "url": "http://twitter.com/ron" 37 | }, 38 | "repository": { 39 | "type": "git", 40 | "url": "git+ssh://git@github.com/crosstype/node-html-markdown.git" 41 | }, 42 | "bugs": { 43 | "url": "https://github.com/crosstype/node-html-markdown/issues" 44 | }, 45 | "homepage": "https://github.com/crosstype/node-html-markdown#readme", 46 | "license": "MIT", 47 | "engines": { 48 | "node": ">=10.0.0" 49 | }, 50 | "dependencies": { 51 | "node-html-parser": "^6.1.1" 52 | }, 53 | "devDependencies": { 54 | "@types/jest": "~28.1.1", 55 | "@types/node": "^18.11.5", 56 | "jest": "^29.2.2", 57 | "standard-version": "^9.5.0", 58 | "ts-jest": "^29.0.3", 59 | "ts-node": "^10.9.1", 60 | "ts-patch": "^2.0.2", 61 | "typescript": "^4.8.4", 62 | "rimraf": "^3.0.2" 63 | }, 64 | "standard-version": { 65 | "types": [ 66 | { 67 | "type": "feat", 68 | "section": "Features" 69 | }, 70 | { 71 | "type": "fix", 72 | "section": "Fixes" 73 | }, 74 | { 75 | "type": "chore", 76 | "hidden": true 77 | }, 78 | { 79 | "type": "docs", 80 | "hidden": true 81 | }, 82 | { 83 | "type": "style", 84 | "hidden": true 85 | }, 86 | { 87 | "type": "refactor", 88 | "hidden": true 89 | }, 90 | { 91 | "type": "perf", 92 | "hidden": true 93 | }, 94 | { 95 | "type": "test", 96 | "hidden": true 97 | } 98 | ] 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | import { isWhiteSpaceOnly, splitSpecial, surround, tagSurround, trimNewLines } from './utilities'; 2 | import { PostProcessResult, TranslatorConfigObject } from './translator'; 3 | import { NodeHtmlMarkdownOptions } from './options'; 4 | import { Options as NodeHtmlParserOptions } from 'node-html-parser' 5 | 6 | 7 | /* ****************************************************************************************************************** */ 8 | // region: Elements 9 | /* ****************************************************************************************************************** */ 10 | 11 | export const defaultBlockElements = [ 12 | 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS', 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 13 | 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 14 | 'HEADER', 'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES', 'NOSCRIPT', 'OL', 15 | 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD', 'TFOOT', 'TH', 'THEAD', 'TR', 'UL' 16 | ] 17 | 18 | export const defaultIgnoreElements = [ 19 | 'AREA', 'BASE', 'COL', 'COMMAND', 'EMBED', 'HEAD', 'INPUT', 'KEYGEN', 'LINK', 'META', 'PARAM', 'SCRIPT', 20 | 'SOURCE', 'STYLE', 'TRACK', 'WBR' 21 | ]; 22 | 23 | export const contentlessElements = [ 'BR', 'HR', 'IMG' ]; 24 | 25 | // endregion 26 | 27 | 28 | /* ****************************************************************************************************************** */ 29 | // region: Options 30 | /* ****************************************************************************************************************** */ 31 | 32 | // noinspection RegExpUnnecessaryNonCapturingGroup 33 | export const defaultOptions: Readonly = Object.freeze({ 34 | preferNativeParser: false, 35 | codeFence: '```', 36 | bulletMarker: '*', 37 | indent: ' ', 38 | codeBlockStyle: <'indented' | 'fenced'>'fenced', 39 | emDelimiter: '_', 40 | strongDelimiter: '**', 41 | strikeDelimiter: '~~', 42 | maxConsecutiveNewlines: 3, 43 | /** 44 | * Character: Affects: Example: 45 | * 46 | * \ Escaping \- 47 | * ` Code `` code ``, ```lang\n code block \n``` 48 | * * Bullet & Separators * item, *** 49 | * _ Bold, Italics, Separator _italic_, __bold__, ^___ 50 | * ~ Strikethrough, Code ~~strike~~, ~~~lang\n code block \n~~~ 51 | * [ Url [caption](url) 52 | * ] Url [caption](url) 53 | */ 54 | globalEscape: [ /[\\`*_~\[\]]/gm, '\\$&' ] as const, 55 | /** 56 | * Note: The following compiled pattern was selected after perf testing various alternatives. 57 | * Please be mindful of performance if updating/changing it. 58 | * 59 | * Sequence: Affects: Example: 60 | * 61 | * +(space) Bullets + item 62 | * = Heading heading\n==== 63 | * #{1,6}(space) Heading ## Heading 64 | * > Blockquote > quote 65 | * - Bullet, Header, Separator - item, heading\n---, --- 66 | * \d+\.(space) Numbered list item 1. Item 67 | */ 68 | lineStartEscape: [ 69 | /^(\s*?)((?:\+\s)|(?:[=>-])|(?:#{1,6}\s))|(?:(\d+)(\.\s))/gm, 70 | '$1$3\\$2$4' 71 | ] as const, 72 | 73 | useInlineLinks: true 74 | }); 75 | 76 | // endregion 77 | 78 | 79 | /* ****************************************************************************************************************** */ 80 | // region: Translators 81 | /* ****************************************************************************************************************** */ 82 | 83 | export const defaultTranslators: TranslatorConfigObject = { 84 | /* Pre-formatted text */ 85 | 'pre': { noEscape: true, preserveWhitespace: true }, 86 | 87 | /* Line break */ 88 | 'br': { content: ` \n`, recurse: false }, 89 | 90 | /* Horizontal Rule*/ 91 | 'hr': { content: '---', recurse: false }, 92 | 93 | /* Headings */ 94 | 'h1,h2,h3,h4,h5,h6': ({ node }) => ({ 95 | prefix: '#'.repeat(+node.tagName.charAt(1)) + ' ' 96 | }), 97 | 98 | /* Bold / Strong */ 99 | 'strong,b': { 100 | spaceIfRepeatingChar: true, 101 | postprocess: ({ content, options: { strongDelimiter } }) => 102 | isWhiteSpaceOnly(content) 103 | ? PostProcessResult.RemoveNode 104 | : tagSurround(content, strongDelimiter) 105 | }, 106 | 107 | /* Strikethrough */ 108 | 'del,s,strike': { 109 | spaceIfRepeatingChar: true, 110 | postprocess: ({ content, options: { strikeDelimiter } }) => 111 | isWhiteSpaceOnly(content) 112 | ? PostProcessResult.RemoveNode 113 | : tagSurround(content, strikeDelimiter) 114 | }, 115 | 116 | /* Italic / Emphasis */ 117 | 'em,i': { 118 | spaceIfRepeatingChar: true, 119 | postprocess: ({ content, options: { emDelimiter } }) => 120 | isWhiteSpaceOnly(content) 121 | ? PostProcessResult.RemoveNode 122 | : tagSurround(content, emDelimiter) 123 | }, 124 | 125 | /* Lists (ordered & unordered) */ 126 | 'ol,ul': ({ listKind }) => ({ 127 | surroundingNewlines: listKind ? 1 : 2, 128 | }), 129 | 130 | /* List Item */ 131 | 'li': ({ options: { bulletMarker }, indentLevel, listKind, listItemNumber }) => { 132 | const indentationLevel = +(indentLevel || 0); 133 | return { 134 | prefix: ' '.repeat(+(indentLevel || 0)) + 135 | (((listKind === 'OL') && (listItemNumber !== undefined)) ? `${listItemNumber}. ` : `${bulletMarker} `), 136 | surroundingNewlines: 1, 137 | postprocess: ({ content }) => 138 | isWhiteSpaceOnly(content) 139 | ? PostProcessResult.RemoveNode 140 | : content 141 | .trim() 142 | .replace(/([^\r\n])(?:\r?\n)+/g, `$1 \n${' '.repeat(indentationLevel)}`) 143 | .replace(/(\S+?)[^\S\r\n]+$/gm, '$1 ') 144 | } 145 | }, 146 | 147 | /* Block Quote */ 148 | 'blockquote': { 149 | postprocess: ({ content }) => trimNewLines(content).replace(/^(>*)[^\S\r\n]?/gm, `>$1 `) 150 | }, 151 | 152 | /* Code (block / inline) */ 153 | 'code': ({ node, parent, options: { codeFence, codeBlockStyle }, visitor }) => { 154 | const isCodeBlock = [ 'PRE', 'WRAPPED-PRE' ].includes(parent?.tagName!) && parent!.childNodes.length < 2; 155 | 156 | /* Handle code (non-block) */ 157 | if (!isCodeBlock) 158 | return { 159 | spaceIfRepeatingChar: true, 160 | noEscape: true, 161 | postprocess: ({ content }) => { 162 | // Find longest occurring sequence of running backticks and add one more (so content is escaped) 163 | const delimiter = '`' + (content.match(/`+/g)?.sort((a, b) => b.length - a.length)?.[0] || ''); 164 | const padding = delimiter.length > 1 ? ' ' : ''; 165 | 166 | return surround(surround(content, padding), delimiter) 167 | } 168 | } 169 | 170 | /* Handle code block */ 171 | if (codeBlockStyle === 'fenced') { 172 | const language = node.getAttribute('class')?.match(/language-(\S+)/)?.[1] || ''; 173 | return { 174 | noEscape: true, 175 | prefix: codeFence + language + '\n', 176 | postfix: '\n' + codeFence, 177 | childTranslators: visitor.instance.codeBlockTranslators 178 | } 179 | } else { 180 | return { 181 | noEscape: true, 182 | postprocess: ({ content }) => content.replace(/^/gm, ' '), 183 | childTranslators: visitor.instance.codeBlockTranslators 184 | } 185 | } 186 | }, 187 | 188 | /* Table */ 189 | 'table': ({ visitor }) => ({ 190 | surroundingNewlines: 2, 191 | childTranslators: visitor.instance.tableTranslators, 192 | postprocess: ({ content, nodeMetadata, node }) => { 193 | // Split and trim leading + trailing pipes 194 | const rawRows = splitSpecial(content).map(({ text }) => text.replace(/^(?:\|\s+)?(.+)\s*\|\s*$/, '$1')); 195 | 196 | /* Get Row Data */ 197 | const rows: string[][] = []; 198 | let colWidth: number[] = []; 199 | for (const row of rawRows) { 200 | if (!row) continue; 201 | 202 | /* Track columns */ 203 | const cols = row.split(' |').map((c, i) => { 204 | c = c.trim(); 205 | if (colWidth.length < i + 1 || colWidth[i] < c.length) colWidth[i] = c.length; 206 | 207 | return c; 208 | }); 209 | 210 | rows.push(cols); 211 | } 212 | 213 | if (rows.length < 1) return PostProcessResult.RemoveNode; 214 | 215 | /* Compose Table */ 216 | const maxCols = colWidth.length; 217 | 218 | let res = ''; 219 | const caption = nodeMetadata.get(node)!.tableMeta!.caption; 220 | if (caption) res += caption + '\n'; 221 | 222 | rows.forEach((cols, rowNumber) => { 223 | res += '| '; 224 | 225 | /* Add Columns */ 226 | for (let i = 0; i < maxCols; i++) { 227 | let c = (cols[i] ?? ''); 228 | c += ' '.repeat(Math.max(0, (colWidth[i] - c.length))); // Pad to max length 229 | 230 | res += c + ' |' + (i < maxCols - 1 ? ' ' : ''); 231 | } 232 | 233 | res += '\n'; 234 | 235 | // Add separator row 236 | if (rowNumber === 0) res += '|' + colWidth.map(w => ' ' + '-'.repeat(w) + ' |').join('') + '\n' 237 | }); 238 | 239 | return res; 240 | } 241 | }), 242 | 243 | /* Link */ 244 | 'a': ({ node, options, visitor }) => { 245 | const href = node.getAttribute('href'); 246 | if (!href) return {}; 247 | 248 | // Encodes symbols that can cause problems in markdown 249 | let encodedHref = ''; 250 | for (const chr of href) { 251 | switch (chr) { 252 | case '(': 253 | encodedHref += '%28'; 254 | break; 255 | case ')': 256 | encodedHref += '%29'; 257 | break; 258 | case '_': 259 | encodedHref += '%5F'; 260 | break; 261 | case '*': 262 | encodedHref += '%2A'; 263 | break; 264 | default: 265 | encodedHref += chr; 266 | } 267 | } 268 | 269 | const title = node.getAttribute('title'); 270 | 271 | // Inline link, when possible 272 | // See: https://github.com/crosstype/node-html-markdown/issues/17 273 | if (node.textContent === href && options.useInlineLinks) return { content: `<${encodedHref}>` }; 274 | 275 | return { 276 | postprocess: ({ content }) => content.replace(/(?:\r?\n)+/g, ' '), 277 | childTranslators: visitor.instance.aTagTranslators, 278 | prefix: '[', 279 | postfix: ']' + (!options.useLinkReferenceDefinitions 280 | ? `(${encodedHref}${title ? ` "${title}"` : ''})` 281 | : `[${visitor.addOrGetUrlDefinition(encodedHref)}]`) 282 | } 283 | }, 284 | 285 | /* Image */ 286 | 'img': ({ node, options }) => { 287 | const src = node.getAttribute('src') || ''; 288 | if (!src || (!options.keepDataImages && /^data:/i.test(src))) return { ignore: true }; 289 | 290 | const alt = node.getAttribute('alt') || ''; 291 | const title = node.getAttribute('title') || ''; 292 | 293 | return { 294 | content: `![${alt}](${src}${title && ` "${title}"`})`, 295 | recurse: false 296 | } 297 | }, 298 | } 299 | 300 | export const tableTranslatorConfig: TranslatorConfigObject = { 301 | /* Table Caption */ 302 | 'caption': ({ visitor }) => ({ 303 | surroundingNewlines: false, 304 | childTranslators: visitor.instance.tableCellTranslators, 305 | postprocess: ({ content, nodeMetadata, node }) => { 306 | const caption = content.replace(/(?:\r?\n)+/g, ' ').trim(); 307 | if (caption) nodeMetadata.get(node)!.tableMeta!.caption = '__' + caption + '__' 308 | 309 | return PostProcessResult.RemoveNode; 310 | }, 311 | }), 312 | 313 | /* Table row */ 314 | 'tr': ({ visitor }) => ({ 315 | surroundingNewlines: false, 316 | childTranslators: visitor.instance.tableRowTranslators, 317 | postfix: '\n', 318 | prefix: '| ', 319 | postprocess: ({ content }) => !/ \|\s*$/.test(content) ? PostProcessResult.RemoveNode : content 320 | }), 321 | 322 | /* Table cell, (header cell) */ 323 | 'th,td': ({ visitor }) => ({ 324 | surroundingNewlines: false, 325 | childTranslators: visitor.instance.tableCellTranslators, 326 | prefix: ' ', 327 | postfix: ' |', 328 | postprocess: ({ content }) => 329 | trimNewLines(content) 330 | .replace('|', '\\|') 331 | .replace(/(?:\r?\n)+/g, ' ') 332 | .trim() 333 | }), 334 | } 335 | 336 | export const tableRowTranslatorConfig: TranslatorConfigObject = { 337 | 'th,td': tableTranslatorConfig['th,td'] 338 | } 339 | 340 | export const tableCellTranslatorConfig: TranslatorConfigObject = { 341 | 'a': defaultTranslators['a'], 342 | 'strong,b': defaultTranslators['strong,b'], 343 | 'del,s,strike': defaultTranslators['del,s,strike'], 344 | 'em,i': defaultTranslators['em,i'], 345 | 'img': defaultTranslators['img'] 346 | } 347 | 348 | export const defaultCodeBlockTranslators: TranslatorConfigObject = { 349 | 'br': { content: `\n`, recurse: false }, 350 | 'hr': { content: '---', recurse: false }, 351 | 'h1,h2,h3,h4,h5,h6': { prefix: '[', postfix: ']' }, 352 | 'ol,ul': defaultTranslators['ol,ul'], 353 | 'li': defaultTranslators['li'], 354 | 'tr': { surroundingNewlines: true }, 355 | 'img': { recurse: false } 356 | } 357 | 358 | export const aTagTranslatorConfig: TranslatorConfigObject = { 359 | 'br': { content: '\n', recurse: false }, 360 | 'hr': { content: '\n', recurse: false }, 361 | 'pre': defaultTranslators['pre'], 362 | 'strong,b': defaultTranslators['strong,b'], 363 | 'del,s,strike': defaultTranslators['del,s,strike'], 364 | 'em,i': defaultTranslators['em,i'], 365 | 'img': defaultTranslators['img'] 366 | } 367 | 368 | // endregion 369 | 370 | 371 | /* ****************************************************************************************************************** */ 372 | // region: General 373 | /* ****************************************************************************************************************** */ 374 | 375 | /** 376 | * Note: Do not change - values are tuned for performance 377 | */ 378 | export const nodeHtmlParserConfig: NodeHtmlParserOptions = { 379 | lowerCaseTagName: false, 380 | comment: false, 381 | fixNestedATags: true, 382 | blockTextElements: { 383 | script: false, 384 | noscript: false, 385 | style: false 386 | } 387 | }; 388 | 389 | // endregion 390 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { NodeMetadata, NodeMetadataMap } from './visitor' 2 | export { NodeHtmlMarkdown, FileCollection } from './main' 3 | export { NodeHtmlMarkdownOptions } from './options' 4 | export { 5 | TranslatorConfig, TranslatorConfigFactory, TranslatorCollection, PostProcessResult, TranslatorConfigObject 6 | } from './translator' 7 | -------------------------------------------------------------------------------- /src/main.ts: -------------------------------------------------------------------------------- 1 | import { NodeHtmlMarkdownOptions } from './options'; 2 | import { TranslatorCollection, TranslatorConfigObject } from './translator'; 3 | import { 4 | aTagTranslatorConfig, defaultBlockElements, defaultCodeBlockTranslators, defaultIgnoreElements, defaultOptions, 5 | defaultTranslators, tableCellTranslatorConfig, tableRowTranslatorConfig, tableTranslatorConfig 6 | } from './config'; 7 | import { parseHTML } from './utilities'; 8 | import { getMarkdownForHtmlNodes } from './visitor'; 9 | 10 | 11 | /* ****************************************************************************************************************** */ 12 | // region: Types 13 | /* ****************************************************************************************************************** */ 14 | 15 | export type FileCollection = { [fileName: string]: string } 16 | type Options = Partial 17 | 18 | // endregion 19 | 20 | 21 | /* ****************************************************************************************************************** */ 22 | // region: NodeHtmlMarkdown (class) 23 | /* ****************************************************************************************************************** */ 24 | 25 | export class NodeHtmlMarkdown { 26 | public translators = new TranslatorCollection(); 27 | public aTagTranslators = new TranslatorCollection(); 28 | public codeBlockTranslators = new TranslatorCollection(); 29 | public tableTranslators = new TranslatorCollection(); 30 | public tableRowTranslators = new TranslatorCollection(); 31 | public tableCellTranslators = new TranslatorCollection(); 32 | public readonly options: NodeHtmlMarkdownOptions 33 | 34 | constructor(options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject) { 35 | /* Setup Options */ 36 | this.options = { ...defaultOptions, ...options }; 37 | const ignoredElements = this.options.ignore?.concat(defaultIgnoreElements) ?? defaultIgnoreElements; 38 | const blockElements = this.options.blockElements?.concat(defaultBlockElements) ?? defaultBlockElements; 39 | 40 | /* Setup Translator Bases */ 41 | ignoredElements?.forEach(el => { 42 | this.translators.set(el, { ignore: true, recurse: false }); 43 | this.codeBlockTranslators.set(el, { ignore: true, recurse: false }); 44 | }) 45 | 46 | blockElements?.forEach(el => { 47 | this.translators.set(el, { surroundingNewlines: 2 }); 48 | this.codeBlockTranslators.set(el, { surroundingNewlines: 2 }); 49 | }); 50 | 51 | /* Add and merge bases with default and custom translator configs */ 52 | for (const [ elems, cfg ] of Object.entries({ ...defaultTranslators, ...customTranslators })) 53 | this.translators.set(elems, cfg, true); 54 | 55 | for (const [ elems, cfg ] of Object.entries({ ...defaultCodeBlockTranslators, ...customCodeBlockTranslators })) 56 | this.codeBlockTranslators.set(elems, cfg, true); 57 | 58 | for (const [ elems, cfg ] of Object.entries(aTagTranslatorConfig)) 59 | this.aTagTranslators.set(elems, cfg, true); 60 | 61 | for (const [ elems, cfg ] of Object.entries(tableTranslatorConfig)) 62 | this.tableTranslators.set(elems, cfg, true); 63 | 64 | for (const [ elems, cfg ] of Object.entries(tableRowTranslatorConfig)) 65 | this.tableRowTranslators.set(elems, cfg, true); 66 | 67 | for (const [ elems, cfg ] of Object.entries(tableCellTranslatorConfig)) 68 | this.tableCellTranslators.set(elems, cfg, true); 69 | 70 | // TODO - Workaround for upstream issue (may not be fixed) - https://github.com/taoqf/node-html-parser/issues/78 71 | if (!this.options.textReplace) this.options.textReplace = []; 72 | this.options.textReplace.push([ /^/gmi, '' ]); 73 | } 74 | 75 | /* ********************************************************* */ 76 | // region: Static Methods 77 | /* ********************************************************* */ 78 | 79 | /** 80 | * Translate HTML source text to markdown 81 | */ 82 | static translate(html: string, options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): string 83 | /** 84 | * Translate collection of HTML source text to markdown 85 | */ 86 | static translate(files: FileCollection, options?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): FileCollection 87 | static translate(htmlOrFiles: string | FileCollection, opt?: Options, customTranslators?: TranslatorConfigObject, customCodeBlockTranslators?: TranslatorConfigObject): 88 | string | FileCollection 89 | { 90 | return NodeHtmlMarkdown.prototype.translateWorker.call(new NodeHtmlMarkdown(opt, customTranslators, customCodeBlockTranslators), htmlOrFiles); 91 | } 92 | 93 | // endregion 94 | 95 | /* ********************************************************* */ 96 | // region: Methods 97 | /* ********************************************************* */ 98 | 99 | /** 100 | * Translate HTML source text to markdown 101 | */ 102 | translate(html: string): string 103 | /** 104 | * Translate collection of HTML source text to markdown 105 | */ 106 | translate(files: FileCollection): FileCollection 107 | translate(htmlOrFiles: string | FileCollection): string | FileCollection { 108 | return this.translateWorker(htmlOrFiles); 109 | } 110 | 111 | // endregion 112 | 113 | /* ********************************************************* */ 114 | // region: Internal Methods 115 | /* ********************************************************* */ 116 | 117 | private translateWorker(htmlOrFiles: string | FileCollection) { 118 | const inputIsCollection = typeof htmlOrFiles !== 'string'; 119 | const inputFiles: FileCollection = !inputIsCollection ? { 'default': htmlOrFiles } : htmlOrFiles; 120 | const outputFiles: FileCollection = {}; 121 | 122 | for (const [ fileName, html ] of Object.entries(inputFiles)) { 123 | const parsedHtml = parseHTML(html, this.options); 124 | outputFiles[fileName] = getMarkdownForHtmlNodes(this, parsedHtml, fileName !== 'default' ? fileName : void 0); 125 | } 126 | 127 | return inputIsCollection ? outputFiles : outputFiles['default']; 128 | } 129 | 130 | // endregion 131 | 132 | } 133 | 134 | // endregion 135 | -------------------------------------------------------------------------------- /src/nodes.ts: -------------------------------------------------------------------------------- 1 | import * as NHParser from 'node-html-parser'; 2 | import { CommentNode, NodeType } from 'node-html-parser'; 3 | 4 | 5 | /* ****************************************************************************************************************** */ 6 | // region: Types 7 | /* ****************************************************************************************************************** */ 8 | 9 | export { NodeType, CommentNode } 10 | 11 | /* ********************************************************* * 12 | * Merged Nodes - Unions of node-html-parser and common DOM 13 | * ********************************************************* */ 14 | 15 | type NodeBase = { preserve?: boolean } 16 | 17 | export type HtmlNode = (NHParser.Node | Node) & NodeBase 18 | export type ElementNode = (NHParser.HTMLElement | HTMLElement) & NodeBase 19 | export type TextNode = (NHParser.TextNode) & NodeBase 20 | 21 | // endregion 22 | 23 | 24 | /* ****************************************************************************************************************** */ 25 | // region: TypeGuards 26 | /* ****************************************************************************************************************** */ 27 | 28 | export const isTextNode = (node: HtmlNode): node is TextNode => node.nodeType === NodeType.TEXT_NODE; 29 | export const isCommentNode = (node: HtmlNode): node is CommentNode => node.nodeType === NodeType.COMMENT_NODE; 30 | export const isElementNode = (node: HtmlNode): node is ElementNode => node.nodeType === NodeType.ELEMENT_NODE; 31 | 32 | // endregion 33 | -------------------------------------------------------------------------------- /src/options.ts: -------------------------------------------------------------------------------- 1 | /* ****************************************************************************************************************** */ 2 | // region: Types 3 | /* ****************************************************************************************************************** */ 4 | 5 | export interface NodeHtmlMarkdownOptions { 6 | /** 7 | * Use native window DOMParser when available 8 | * @default false 9 | */ 10 | preferNativeParser: boolean, 11 | 12 | /** 13 | * Code block fence 14 | * @default ``` 15 | */ 16 | codeFence: string, 17 | 18 | /** 19 | * Bullet marker 20 | * @default * 21 | */ 22 | bulletMarker: string, 23 | 24 | /** 25 | * Style for code block 26 | * @default fence 27 | */ 28 | codeBlockStyle: 'indented' | 'fenced', 29 | 30 | /** 31 | * Emphasis delimiter 32 | * @default _ 33 | */ 34 | emDelimiter: string, 35 | 36 | /** 37 | * Strong delimiter 38 | * @default ** 39 | */ 40 | strongDelimiter: string, 41 | 42 | /** 43 | * Strong delimiter 44 | * @default ~~ 45 | */ 46 | strikeDelimiter: string, 47 | 48 | /** 49 | * Supplied elements will be ignored (ignores inner text does not parse children) 50 | */ 51 | readonly ignore?: string[], 52 | 53 | /** 54 | * Supplied elements will be treated as blocks (surrounded with blank lines) 55 | */ 56 | readonly blockElements?: string[], 57 | 58 | /** 59 | * Max consecutive new lines allowed 60 | * @default 3 61 | */ 62 | maxConsecutiveNewlines: number, 63 | 64 | /** 65 | * Line Start Escape pattern 66 | * (Note: Setting this will override the default escape settings, you might want to use textReplace option instead) 67 | */ 68 | lineStartEscape: readonly [ pattern: RegExp, replacement: string ] 69 | 70 | /** 71 | * Global escape pattern 72 | * (Note: Setting this will override the default escape settings, you might want to use textReplace option instead) 73 | */ 74 | globalEscape: readonly [ pattern: RegExp, replacement: string ] 75 | 76 | /** 77 | * User-defined text replacement pattern (Replaces matching text retrieved from nodes) 78 | */ 79 | textReplace?: (readonly [ pattern: RegExp, replacement: string ])[] 80 | 81 | /** 82 | * Keep images with data: URI (Note: These can be up to 1MB each) 83 | * @example 84 | * 85 | * @default false 86 | */ 87 | keepDataImages?: boolean 88 | 89 | /** 90 | * Place URLS at the bottom and format links using link reference definitions 91 | * 92 | * @example 93 | * Click here. Or here. Or this link. 94 | * 95 | * Becomes: 96 | * Click [here][1]. Or [here][2]. Or [this link][1]. 97 | * 98 | * [1]: /url 99 | * [2]: /url2 100 | */ 101 | useLinkReferenceDefinitions?: boolean 102 | 103 | /** 104 | * Wrap URL text in < > instead of []() syntax. 105 | * 106 | * @example 107 | * The input https://google.com 108 | * becomes 109 | * instead of [https://google.com](https://google.com) 110 | * 111 | * @default true 112 | */ 113 | useInlineLinks?: boolean 114 | } 115 | 116 | // endregion 117 | -------------------------------------------------------------------------------- /src/translator.ts: -------------------------------------------------------------------------------- 1 | import { NodeHtmlMarkdownOptions } from './options'; 2 | import { NodeMetadata, NodeMetadataMap, Visitor } from './visitor'; 3 | import { ElementNode } from './nodes'; 4 | 5 | 6 | /* ****************************************************************************************************************** */ 7 | // region: Types 8 | /* ****************************************************************************************************************** */ 9 | 10 | export type TranslatorConfigFactory = { 11 | (ctx: TranslatorContext): TranslatorConfig 12 | base?: TranslatorConfig 13 | } 14 | 15 | export type TranslatorConfigObject = { [tags: string]: TranslatorConfig | TranslatorConfigFactory } 16 | 17 | export type TranslatorContext = Partial & { 18 | node: ElementNode 19 | options: NodeHtmlMarkdownOptions 20 | parent?: ElementNode 21 | nodeMetadata: NodeMetadataMap 22 | visitor: Visitor 23 | base?: TranslatorConfig 24 | } 25 | 26 | export interface TranslatorConfig { 27 | /** 28 | * Preceeds content, follows surroundingNewLines 29 | */ 30 | prefix?: string 31 | 32 | /** 33 | * Follows content, preceeds surroundingNewLines 34 | */ 35 | postfix?: string 36 | 37 | /** 38 | * Set fixed output content 39 | */ 40 | content?: string 41 | 42 | /** 43 | * Post-process content after inner nodes have been rendered. 44 | * Returning undefined will cause the content to not be updated 45 | */ 46 | postprocess?: (ctx: TranslatorContext & { content: string }) => string | PostProcessResult 47 | 48 | /** 49 | * If false, no child elements will be scanned 50 | * @default true 51 | */ 52 | recurse?: boolean 53 | 54 | /** 55 | * Adds newline before and after (true, false, or number of newlines to add per side) 56 | * @default false 57 | */ 58 | surroundingNewlines?: boolean | number 59 | 60 | /** 61 | * Ignore node entirely 62 | */ 63 | ignore?: boolean 64 | 65 | /** 66 | * Do not escape content 67 | */ 68 | noEscape?: boolean 69 | 70 | /** 71 | * If first character matches end of the last written data, add a space 72 | * @example 73 | * // old text: **abc** 74 | * // new text: **def** 75 | * // becomes: **abc** **def** 76 | */ 77 | spaceIfRepeatingChar?: boolean 78 | 79 | /** 80 | * Ensure translator is always visited, even if element is empty 81 | * Note: For speed, trees are optimized beforehand to only visit elements which have child nodes or text content. 82 | * In some cases, however, you may want to create or alter a translator to be triggered even if the element is empty. 83 | * (If using a TranslatorConfigFactory, this value is always treated as true) 84 | */ 85 | preserveIfEmpty?: boolean 86 | 87 | /** 88 | * Keep whitespace as it is 89 | */ 90 | preserveWhitespace?: boolean 91 | 92 | /** 93 | * Custom translator collection to use for child HTML nodes 94 | */ 95 | childTranslators?: TranslatorCollection 96 | } 97 | 98 | export enum PostProcessResult { 99 | NoChange, 100 | RemoveNode 101 | } 102 | 103 | // endregion 104 | 105 | 106 | /* ****************************************************************************************************************** */ 107 | // region: TranslatorCollection 108 | /* ****************************************************************************************************************** */ 109 | 110 | export class TranslatorCollection { 111 | /** 112 | * @internal 113 | */ 114 | [tagName: string]: any 115 | 116 | get size() { return Object.keys(this).length } 117 | 118 | /** 119 | * Add / update translator config for one or more element tags 120 | */ 121 | set(keys: string, config: TranslatorConfig | TranslatorConfigFactory, /* @internal */ preserveBase?: boolean) { 122 | keys.split(',').forEach(el => { 123 | el = el.toUpperCase(); 124 | 125 | let res = config; 126 | if (preserveBase) { 127 | const base = this[el]; 128 | if (isTranslatorConfig(base)) 129 | res = !isTranslatorConfig(config) 130 | ? Object.assign((...args: any[]) => (config).apply(void 0, args), { base }) 131 | : { ...base, ...config }; 132 | } 133 | 134 | this[el] = res; 135 | }); 136 | } 137 | 138 | /** 139 | * Get translator config for element tag 140 | */ 141 | get(key: string): TranslatorConfig | TranslatorConfigFactory { 142 | return this[key.toUpperCase()] as any; 143 | } 144 | 145 | /** 146 | * Returns array of entries 147 | */ 148 | entries(): [ elementName: string, config: TranslatorConfig | TranslatorConfigFactory ][] { 149 | return Object.entries(this); 150 | } 151 | 152 | /** 153 | * Remove translator config for one or more element tags 154 | */ 155 | remove(keys: string): void { 156 | keys.split(',').forEach(el => delete this[el.toUpperCase()]); 157 | } 158 | } 159 | 160 | // endregion 161 | 162 | 163 | /* ****************************************************************************************************************** */ 164 | // region: Utilities 165 | /* ****************************************************************************************************************** */ 166 | 167 | /** 168 | * Only use to narrow union of types where only TranslatorConfig has JS type 'object' 169 | */ 170 | export const isTranslatorConfig = (v: any): v is TranslatorConfig => typeof v === 'object'; 171 | 172 | export function createTranslatorContext( 173 | visitor: Visitor, 174 | node: ElementNode, 175 | metadata?: NodeMetadata, 176 | base?: TranslatorConfig 177 | ): TranslatorContext 178 | { 179 | const { instance, nodeMetadata, } = visitor; 180 | return { 181 | node, 182 | options: instance.options, 183 | parent: node.parentNode, 184 | nodeMetadata, 185 | visitor, 186 | base, 187 | ...metadata 188 | }; 189 | } 190 | 191 | // endregion 192 | -------------------------------------------------------------------------------- /src/utilities.ts: -------------------------------------------------------------------------------- 1 | import { NodeHtmlMarkdownOptions } from './options'; 2 | import { ElementNode, HtmlNode } from './nodes'; 3 | import { nodeHtmlParserConfig } from './config'; 4 | 5 | 6 | /* ****************************************************************************************************************** */ 7 | // region: String Utils 8 | /* ****************************************************************************************************************** */ 9 | 10 | export const trimNewLines = (s: string) => s.replace(/^\n+|\n+$/g, ''); 11 | export const surround = (source: string, surroundStr: string) => `${surroundStr}${source}${surroundStr}`; 12 | export const isWhiteSpaceOnly = (s: string) => !/\S/.test(s); 13 | 14 | /** 15 | * Split string, preserving specific newline used for each line 16 | */ 17 | export function splitSpecial(s: string) { 18 | const lines: { text: string, newLineChar: '\r' | '\n' | '\r\n' | '' }[] = []; 19 | const strLen = s.length; 20 | 21 | for (let i = 0, startPos = 0; i < strLen; ++i) { 22 | let char = s.charAt(i); 23 | let newLineChar: typeof lines[number]['newLineChar'] = ''; 24 | 25 | if (char === '\r') newLineChar = (s.charAt(i + 1) === '\n') ? '\r\n' : char; 26 | else if (char === '\n') newLineChar = char; 27 | 28 | const endPos = newLineChar ? i : 29 | i === (strLen - 1) ? i + 1 : 30 | undefined; 31 | 32 | if (endPos === undefined) continue; 33 | 34 | lines.push({ 35 | text: s.slice(startPos, endPos), 36 | newLineChar 37 | }); 38 | 39 | startPos = endPos + newLineChar.length; 40 | if (newLineChar.length > 1) ++i; 41 | } 42 | 43 | return lines; 44 | } 45 | 46 | /** 47 | * Surround tag content with delimiter (moving any leading/trailing space to outside the tag 48 | */ 49 | export function tagSurround(content: string, surroundStr: string) { 50 | // If un-escaped surroundStr already occurs, remove all instances 51 | // See: https://github.com/crosstype/node-html-markdown/issues/18 52 | const nestedSurroundStrIndex = content.indexOf(surroundStr); 53 | if (nestedSurroundStrIndex >= 0) 54 | content = content.replace( 55 | new RegExp(`([^\\\\])\\${surroundStr.split('').join('\\')}`, 'gm'), 56 | '$1' 57 | ); 58 | 59 | const lines = splitSpecial(content); 60 | let res = ''; 61 | 62 | for (const { text, newLineChar } of lines) { 63 | let i: number = 0; 64 | let startPos: number | undefined = undefined; 65 | let endPos: number | undefined = undefined; 66 | 67 | while (i >= 0 && i < text.length) { 68 | if (/[\S]/.test(text[i])) { 69 | if (startPos === undefined) { 70 | startPos = i; 71 | i = text.length; 72 | } else { 73 | endPos = i; 74 | i = NaN; 75 | } 76 | } 77 | 78 | if (startPos === undefined) ++i; 79 | else --i; 80 | } 81 | 82 | // If whole string is non-breaking whitespace, don't surround it 83 | if (startPos === undefined) { 84 | res += text + newLineChar; 85 | continue; 86 | } 87 | 88 | if (endPos === undefined) endPos = text.length - 1; 89 | 90 | const leadingSpace = startPos > 0 ? text[startPos - 1] : ''; 91 | const trailingSpace = endPos < (text.length - 1) ? text[endPos + 1] : ''; 92 | 93 | const slicedText = text.slice(startPos, endPos + 1) 94 | 95 | res += leadingSpace + surroundStr + slicedText + surroundStr + trailingSpace + newLineChar; 96 | } 97 | 98 | return res; 99 | } 100 | 101 | export const getTrailingWhitespaceInfo = (s: string): { whitespace: number, newLines: number } => { 102 | const res = { whitespace: 0, newLines: 0 }; 103 | const minI = Math.max(s.length - 10, 0); 104 | for (let i = s.length - 1; i >= minI; --i) { 105 | const token = s.slice(i, i + 1); 106 | if (!/\s/.test(token)) break; 107 | ++res.whitespace; 108 | if ([ '\r', '\n' ].includes(token)) ++res.newLines; 109 | } 110 | return res; 111 | } 112 | 113 | /** 114 | * If value is truthy, returns `value` (or `v` if no `value` provided), otherwise, returns an empty string 115 | * @param v - Var to check for truthiness 116 | * @param value - Value to return if true 117 | */ 118 | export const truthyStr = (v: any, value?: string): string => v ? ((value !== undefined) ? value : String(v)) : ''; 119 | 120 | // endregion 121 | 122 | 123 | /* ****************************************************************************************************************** */ 124 | // region: Parser 125 | /* ****************************************************************************************************************** */ 126 | 127 | function tryParseWithNativeDom(html: string): ElementNode | undefined { 128 | try { 129 | if (!(window?.DOMParser && (new window.DOMParser()).parseFromString('', 'text/html'))) return void 0; 130 | } 131 | catch { 132 | return void 0; 133 | } 134 | 135 | /* Get a document */ 136 | let doc: Document; 137 | try { 138 | doc = document.implementation.createHTMLDocument('').open() 139 | } 140 | catch (e) { 141 | const { ActiveXObject } = (window); 142 | if (ActiveXObject) { 143 | const doc = ActiveXObject('htmlfile'); 144 | doc.designMode = 'on'; // disable on-page scripts 145 | return doc.open(); 146 | } 147 | throw e; 148 | } 149 | 150 | // Prepare document, ensuring we have a wrapper node 151 | doc.write('' + html + ''); 152 | doc.close(); 153 | 154 | return doc.documentElement; 155 | } 156 | 157 | const getNodeHtmlParser = () => { 158 | try { 159 | return require('node-html-parser').parse as typeof import('node-html-parser').parse 160 | } 161 | catch { 162 | return undefined; 163 | } 164 | } 165 | 166 | /** 167 | * Parser string to HTMLElement 168 | */ 169 | export function parseHTML(html: string, options: NodeHtmlMarkdownOptions): ElementNode { 170 | let nodeHtmlParse: ReturnType; 171 | 172 | /* If specified, try to parse with native engine, fallback to node-html-parser */ 173 | perfStart('parse'); 174 | let el: ElementNode | undefined; 175 | if (options.preferNativeParser) { 176 | try { 177 | el = tryParseWithNativeDom(html); 178 | } 179 | catch (e) { 180 | nodeHtmlParse = getNodeHtmlParser(); 181 | if (nodeHtmlParse) console.warn('Native DOM parser encountered an error during parse', e); 182 | else throw e; 183 | } 184 | } else nodeHtmlParse = getNodeHtmlParser(); 185 | 186 | if (!el) el = nodeHtmlParse!(html, nodeHtmlParserConfig); 187 | perfStop('parse'); 188 | 189 | return el; 190 | } 191 | 192 | // endregion 193 | 194 | 195 | /* ****************************************************************************************************************** */ 196 | // region: General 197 | /* ****************************************************************************************************************** */ 198 | 199 | export function getChildNodes(node: T): T[] 200 | export function getChildNodes(node: HtmlNode | Node): (Node | HtmlNode)[] { 201 | if (!isNodeList(node.childNodes)) return node.childNodes; 202 | 203 | const res: (ChildNode)[] = []; 204 | node.childNodes.forEach(n => res.push(n)); 205 | 206 | return res; 207 | 208 | function isNodeList(v: any): v is NodeListOf { 209 | return (v != null) || (typeof v[Symbol.iterator] === 'function'); 210 | } 211 | } 212 | 213 | export function perfStart(label: string) { 214 | if (process.env.LOG_PERF) console.time(label); 215 | } 216 | 217 | export function perfStop(label: string) { 218 | if (process.env.LOG_PERF) console.timeEnd(label); 219 | } 220 | 221 | // endregion 222 | -------------------------------------------------------------------------------- /src/visitor.ts: -------------------------------------------------------------------------------- 1 | import { NodeHtmlMarkdown } from './main'; 2 | import { ElementNode, HtmlNode, isElementNode, isTextNode } from './nodes'; 3 | import { getChildNodes, getTrailingWhitespaceInfo, perfStart, perfStop, trimNewLines } from './utilities'; 4 | import { 5 | createTranslatorContext, isTranslatorConfig, PostProcessResult, TranslatorConfig, TranslatorConfigFactory, 6 | TranslatorConfigObject, TranslatorContext 7 | } from './translator'; 8 | import { NodeHtmlMarkdownOptions } from './options'; 9 | import { contentlessElements } from './config'; 10 | 11 | 12 | /* ****************************************************************************************************************** */ 13 | // region: Types 14 | /* ****************************************************************************************************************** */ 15 | 16 | export interface NodeMetadata { 17 | indentLevel?: number 18 | listKind?: 'OL' | 'UL' 19 | listItemNumber?: number 20 | noEscape?: boolean 21 | preserveWhitespace?: boolean 22 | translators?: TranslatorConfigObject 23 | tableMeta?: { 24 | node: ElementNode, 25 | caption?: string 26 | } 27 | } 28 | 29 | export type NodeMetadataMap = Map 30 | 31 | type VisitorResult = { 32 | text: string 33 | trailingNewlineStats: { 34 | whitespace: number 35 | newLines: number 36 | } 37 | } 38 | 39 | // endregion 40 | 41 | 42 | /* ****************************************************************************************************************** */ 43 | // region: Visitor 44 | /* ****************************************************************************************************************** */ 45 | 46 | /** 47 | * Properties & methods marked public are designated as such due to the fact that we may add middleware / transformer 48 | * support in the future 49 | */ 50 | export class Visitor { 51 | public result: VisitorResult 52 | public nodeMetadata: NodeMetadataMap = new Map(); 53 | public urlDefinitions: string[] = []; 54 | private options: NodeHtmlMarkdownOptions; 55 | 56 | constructor( 57 | public instance: NodeHtmlMarkdown, 58 | public rootNode: HtmlNode, 59 | public fileName?: string, 60 | ) 61 | { 62 | this.result = { 63 | text: '', 64 | trailingNewlineStats: { 65 | whitespace: 0, 66 | newLines: 0 67 | } 68 | }; 69 | this.options = instance.options; 70 | 71 | this.optimizeTree(rootNode); 72 | this.visitNode(rootNode); 73 | } 74 | 75 | /* ********************************************************* */ 76 | // region: Methods 77 | /* ********************************************************* */ 78 | 79 | public addOrGetUrlDefinition(url: string): number { 80 | let id = this.urlDefinitions.findIndex(u => u === url); 81 | if (id < 0) id = this.urlDefinitions.push(url) - 1; 82 | return id + 1; 83 | } 84 | 85 | public appendResult(s: string, startPos?: number, spaceIfRepeatingChar?: boolean) { 86 | if (!s && startPos === undefined) return; 87 | const { result } = this; 88 | 89 | if (startPos !== undefined) result.text = result.text.substr(0, startPos); 90 | result.text += (spaceIfRepeatingChar && result.text.slice(-1) === s[0] ? ' ' : '') + s; 91 | 92 | result.trailingNewlineStats = getTrailingWhitespaceInfo(result.text); 93 | } 94 | 95 | public appendNewlines(count: number) { 96 | const { newLines } = this.result.trailingNewlineStats; 97 | this.appendResult('\n'.repeat(Math.max(0, (+count - newLines)))); 98 | } 99 | 100 | // endregion 101 | 102 | /* ********************************************************* */ 103 | // region: Internal Methods 104 | /* ********************************************************* */ 105 | 106 | /** 107 | * Optimize tree, flagging nodes that have usable content 108 | */ 109 | private optimizeTree(node: HtmlNode) { 110 | perfStart('Optimize tree'); 111 | const { translators } = this.instance; 112 | (function visit(node: HtmlNode): boolean { 113 | let res = false 114 | if (isTextNode(node) || (isElementNode(node) && contentlessElements.includes(node.tagName))) { 115 | res = true; 116 | } 117 | else { 118 | const childNodes = getChildNodes(node); 119 | if (!childNodes.length) { 120 | const translator = translators[(node as ElementNode).tagName]; 121 | if (translator?.preserveIfEmpty || typeof translator === 'function') res = true; 122 | } 123 | else 124 | for (const child of childNodes) { 125 | if (!res) res = visit(child); 126 | else visit(child); 127 | } 128 | } 129 | return node.preserve = res; 130 | })(node); 131 | perfStop('Optimize tree'); 132 | } 133 | 134 | /** 135 | * Apply escaping and custom replacement rules 136 | */ 137 | private processText(text: string, metadata: NodeMetadata | undefined) { 138 | let res = text; 139 | if (!metadata?.preserveWhitespace) res = res.replace(/\s+/g, ' '); 140 | if (metadata?.noEscape) return res; 141 | 142 | const { lineStartEscape, globalEscape, textReplace } = this.options; 143 | res = res 144 | .replace(globalEscape[0], globalEscape[1]) 145 | .replace(lineStartEscape[0], lineStartEscape[1]) 146 | 147 | /* If specified, apply custom replacement patterns */ 148 | if (textReplace) 149 | for (const [ pattern, r ] of textReplace) res = res.replace(pattern, r); 150 | 151 | return res; 152 | } 153 | 154 | public visitNode(node: HtmlNode, textOnly?: boolean, metadata?: NodeMetadata): void { 155 | const { result } = this; 156 | 157 | if (!node.preserve) return; 158 | 159 | /* Handle text node */ 160 | if (isTextNode(node)) { 161 | if ((node).wholeText) { 162 | (node).text ??= (node).wholeText; 163 | (node).trimmedText ??= trimNewLines((node).wholeText); 164 | } 165 | 166 | return node.isWhitespace && !metadata?.preserveWhitespace 167 | ? (!result.text.length || result.trailingNewlineStats.whitespace > 0) ? void 0 : this.appendResult(' ') 168 | : this.appendResult(this.processText(metadata?.preserveWhitespace ? node.text : node.trimmedText, metadata)); 169 | } 170 | 171 | if (textOnly || !isElementNode(node)) return; 172 | 173 | /* Handle element node */ 174 | const translatorCfgOrFactory: TranslatorConfig | TranslatorConfigFactory | undefined = 175 | metadata?.translators ? metadata.translators[node.tagName] : this.instance.translators[node.tagName]; 176 | 177 | /* Update metadata with list detail */ 178 | switch (node.tagName) { 179 | case 'UL': 180 | case 'OL': 181 | metadata = { 182 | ...metadata, 183 | listItemNumber: 0, 184 | listKind: (node.tagName), 185 | indentLevel: (metadata?.indentLevel ?? -1) + 1 186 | }; 187 | break; 188 | case 'LI': 189 | if (metadata?.listKind === 'OL') metadata.listItemNumber = (metadata.listItemNumber ?? 0) + 1; 190 | break; 191 | case 'PRE': 192 | metadata = { 193 | ...metadata, 194 | preserveWhitespace: true 195 | } 196 | break; 197 | case 'TABLE': 198 | metadata = { 199 | ...metadata, 200 | tableMeta: { 201 | node: node 202 | } 203 | } 204 | } 205 | if (metadata) this.nodeMetadata.set(node, metadata); 206 | 207 | // If no translator for element, visit children 208 | if (!translatorCfgOrFactory) { 209 | for (const child of getChildNodes(node)) this.visitNode(child, textOnly, metadata); 210 | return; 211 | } 212 | 213 | /* Get Translator Config */ 214 | let cfg: TranslatorConfig; 215 | let ctx: TranslatorContext | undefined; 216 | if (!isTranslatorConfig(translatorCfgOrFactory)) { 217 | ctx = createTranslatorContext(this, node, metadata, translatorCfgOrFactory.base); 218 | cfg = { ...translatorCfgOrFactory.base, ...translatorCfgOrFactory(ctx) }; 219 | } else cfg = translatorCfgOrFactory; 220 | 221 | // Skip and don't check children if ignore flag set 222 | if (cfg.ignore) return; 223 | 224 | /* Update metadata if needed */ 225 | if (cfg.noEscape && !metadata?.noEscape) { 226 | metadata = { ...metadata, noEscape: cfg.noEscape }; 227 | this.nodeMetadata.set(node, metadata); 228 | } 229 | 230 | if (cfg.childTranslators && (cfg.childTranslators !== metadata?.translators)) { 231 | metadata = { ...metadata, translators: cfg.childTranslators } 232 | this.nodeMetadata.set(node, metadata); 233 | } 234 | 235 | const startPosOuter = result.text.length; 236 | 237 | /* Write opening */ 238 | if (cfg.surroundingNewlines) this.appendNewlines(+cfg.surroundingNewlines); 239 | if (cfg.prefix) this.appendResult(cfg.prefix); 240 | 241 | /* Write inner content */ 242 | if (typeof cfg.content === 'string') this.appendResult(cfg.content, void 0, cfg.spaceIfRepeatingChar); 243 | else { 244 | const startPos = result.text.length; 245 | 246 | // Process child nodes 247 | for (const child of getChildNodes(node)) this.visitNode(child, (cfg.recurse === false), metadata); 248 | 249 | /* Apply translator post-processing */ 250 | if (cfg.postprocess) { 251 | const postRes = cfg.postprocess({ 252 | ...(ctx || createTranslatorContext(this, node, metadata)), 253 | content: result.text.substr(startPos) 254 | }); 255 | 256 | // If remove flag sent, remove / omit everything for this node (prefix, newlines, content, postfix) 257 | if (postRes === PostProcessResult.RemoveNode) { 258 | if (node.tagName === 'LI' && metadata?.listItemNumber) --metadata.listItemNumber; 259 | return this.appendResult('', startPosOuter); 260 | } 261 | 262 | if (typeof postRes === 'string') this.appendResult(postRes, startPos, cfg.spaceIfRepeatingChar); 263 | } 264 | } 265 | 266 | /* Write closing */ 267 | if (cfg.postfix) this.appendResult(cfg.postfix); 268 | if (cfg.surroundingNewlines) this.appendNewlines(+cfg.surroundingNewlines); 269 | } 270 | 271 | // endregion 272 | } 273 | 274 | // endregion 275 | 276 | 277 | /* ****************************************************************************************************************** */ 278 | // region: Utilities 279 | /* ****************************************************************************************************************** */ 280 | 281 | export function getMarkdownForHtmlNodes(instance: NodeHtmlMarkdown, rootNode: HtmlNode, fileName?: string): string { 282 | perfStart('walk'); 283 | const visitor = new Visitor(instance, rootNode, fileName); 284 | let result = visitor.result.text; 285 | perfStop('walk'); 286 | 287 | /* Post-processing */ 288 | // Add link references, if set 289 | if (instance.options.useLinkReferenceDefinitions) { 290 | if (/[^\r\n]/.test(result.slice(-1))) result += '\n'; 291 | visitor.urlDefinitions.forEach((url, idx) => { 292 | result += `\n[${idx + 1}]: ${url}`; 293 | }); 294 | } 295 | 296 | // Fixup repeating newlines 297 | const { maxConsecutiveNewlines } = instance.options; 298 | if (maxConsecutiveNewlines) result = result.replace( 299 | new RegExp(String.raw`(?:\r?\n\s*)+((?:\r?\n\s*){${maxConsecutiveNewlines}})`, 'g'), 300 | '$1' 301 | ); 302 | 303 | return trimNewLines(result); 304 | } 305 | 306 | // endregion 307 | -------------------------------------------------------------------------------- /test/default-tags-codeblock.test.ts: -------------------------------------------------------------------------------- 1 | // noinspection HtmlUnknownTarget 2 | 3 | import { NodeHtmlMarkdown } from '../src'; 4 | 5 | 6 | /* ****************************************************************************************************************** * 7 | * Tests 8 | * ****************************************************************************************************************** */ 9 | 10 | // Note: Newline handling for block elements within code blocks is not very clean. This can be fixed later. 11 | describe(`Default Tags`, () => { 12 | let instance: NodeHtmlMarkdown; 13 | const translateAsBlock = (html: string) => instance.translate(`
${html}
`); 14 | const getExpected = (s: string) => '```\n' + s + '\n```'; 15 | beforeAll(() => { 16 | instance = new NodeHtmlMarkdown(); 17 | }); 18 | 19 | test(`Line Break (br)`, () => { 20 | const res = translateAsBlock(`a
b`); 21 | expect(res).toBe(getExpected(`a\nb`)); 22 | }); 23 | 24 | test(`Horizontal Rule (hr)`, () => { 25 | const res = translateAsBlock(`a
b`); 26 | expect(res).toBe(getExpected(`a\n\n---\n\nb`)); 27 | }); 28 | 29 | test(`Non-processed Elements (b, strong, del, s, strike, em, i, pre, code, blockquote, a)`, () => { 30 | const tags = [ 'b', 'strong', 'del', 's', 'strike', 'em', 'i', 'code', 'a', 'pre', 'blockquote' ]; 31 | const html = tags.map(t => `<${t}>${t}`).join(' '); 32 | const exp = 'b strong del s strike em i code a \n\npre\n\n blockquote\n\n'; 33 | 34 | const res = translateAsBlock(html); 35 | expect(res).toBe(getExpected(exp)); 36 | }); 37 | 38 | test(`Image (img)`, () => { 39 | const res = translateAsBlock(`ab`); 40 | expect(res).toBe(getExpected(`ab`)); 41 | }); 42 | 43 | test(`Headings (h1, h2, h3, h4, h5, h6)`, () => { 44 | let nodes: string[] = []; 45 | for (let i = 1; i < 8; i++) nodes.push(`a`); 46 | const res = translateAsBlock(nodes.join('')); 47 | expect(res).toBe(getExpected('\n[a]\n'.repeat(6) + '\na')); 48 | }); 49 | 50 | // Note: Newline handling here for block elements is unusual 51 | describe(`Lists (ol + li, ul + li)`, () => { 52 | test(`Multi-level Ordered List`, () => { 53 | const res = translateAsBlock(` 54 |
    55 |
  1. a

    b
  2. 56 |
  3. 57 |
  4. b 58 |
    1. c
      d
    59 |
    • e
      f
    60 |
  5. 61 |
62 | `); 63 | expect(res).toBe(getExpected(` \n \n1. a \nb\n \n \n2. b \n \n 1. c \n d \n \n * e \n f\n \n `)); 64 | }); 65 | 66 | test(`Multi-level Unordered List`, () => { 67 | const res = translateAsBlock(` 68 |
    69 |
  • a

    b
  • 70 |
  • 71 |
  • b 72 |
    • c
      d
    73 |
    1. e
      f
    74 |
  • 75 |
76 | `); 77 | expect(res).toBe(getExpected(` \n \n* a \nb\n \n \n* b \n \n * c \n d \n \n 1. e \n f\n \n `)); 78 | }); 79 | }); 80 | 81 | test(`Table`, () => { 82 | const res = translateAsBlock('abc
X
'); 83 | expect(res).toBe(getExpected(`a\nb\nc\n\nX\n\n`)); 84 | }) 85 | }); 86 | -------------------------------------------------------------------------------- /test/default-tags.test.ts: -------------------------------------------------------------------------------- 1 | // noinspection HtmlUnknownTarget 2 | 3 | import { NodeHtmlMarkdown } from '../src'; 4 | 5 | 6 | /* ****************************************************************************************************************** * 7 | * Tests 8 | * ****************************************************************************************************************** */ 9 | 10 | describe(`Default Tags`, () => { 11 | let instance: NodeHtmlMarkdown; 12 | const translate = (html: string) => instance.translate(html); 13 | beforeAll(() => { 14 | instance = new NodeHtmlMarkdown(); 15 | }); 16 | 17 | test(`Line Break (br)`, () => { 18 | const res = translate(`a
b`); 19 | expect(res).toBe(`a \nb`); 20 | }); 21 | 22 | test(`Horizontal Rule (hr)`, () => { 23 | const res = translate(`a
b`); 24 | expect(res).toBe(`a\n\n---\n\nb`); 25 | }); 26 | 27 | test(`Bold (b, strong)`, () => { 28 | const res = translate(`ab

c
d
ab

c
d
`); 29 | const exp = `**a~~b~~** \n \n**c** \n**d**`; 30 | expect(res).toBe(exp + ' ' + exp); 31 | }); 32 | 33 | test(`Strikethrough (del, s, strike)`, () => { 34 | const res = translate(`ab

c
d
ab

c
d
ab

c
d
`); 35 | const exp = `~~a_b_~~ \n \n~~c~~ \n~~d~~`; 36 | expect(res).toBe(exp + ' ' + exp + ' ' + exp); 37 | }); 38 | 39 | test(`Italic / Emphasis (em, i)`, () => { 40 | const res = translate(`a b

c
d
a b

c
d
`); 41 | const exp = `_a ~~b~~_ \n \n_c_ \n_d_`; 42 | expect(res).toBe(exp + ' ' + exp); 43 | }); 44 | 45 | test(`Link (a)`, () => { 46 | const url = 'http://www.github.com/crosstype'; 47 | const specialUrl = 'http://www.github.com/crosstype/**/_test(123)'; 48 | const encodedSpecialUrl = 'http://www.github.com/crosstype/%2A%2A/%5Ftest%28123%29'; 49 | const res = translate(` 50 | a

bc
51 | ab 52 | ${url} 53 | 54 | anestedb 55 | b 56 | `); 57 | expect(res).toBe(`[a b**c**](${url}) a**b** <${url}> [a](${url})[nested](2)![](${url})b **_[b](${encodedSpecialUrl} "a")_** `); 58 | }); 59 | 60 | test(`Image (img)`, () => { 61 | const url = `http://www.github.com/crosstype/` 62 | const res = translate(` 63 | 64 | a2 65 | 66 | a4 67 | 68 | `); 69 | expect(res).toBe(`![](${url}1)` + ` ![](${url}3 "t3")` + ` ![a4](${url}4 "t4") `); 70 | }); 71 | 72 | test(`Pre-formatted Text (pre)`, () => { 73 | const str = `* test \t\n1. test\n\\Test`; 74 | const res = translate(`
${str}
# hello
`); 75 | expect(res).toBe(str + ' \n**# hello**'); 76 | }); 77 | 78 | test(`Block Quote (blockquote)`, () => { 79 | const res = translate(`
a
b
c
def
`); 80 | expect(res).toBe(`> a \n> b \n> c\n> \n>> def`); 81 | }); 82 | 83 | test(`Headings (h1, h2, h3, h4, h5, h6)`, () => { 84 | const res = translate( 85 | `

ab

ab

ab

ab

ab
ab
` 86 | ); 87 | expect(res).toBe(Array.from(Array(6), (v, i) => `#`.repeat(i + 1) + ` a**b**\n\n`).join('').trim()); 88 | }); 89 | 90 | test(`Code (code)`, () => { 91 | const res = translate('```` a \n\nb\n* cd'); 92 | expect(res).toBe('````` ```` a b * c ````` `d`'); 93 | }); 94 | 95 | describe(`Code-block (pre + code)`, () => { 96 | const str = `* test \n\n1. test\n\\Test`; 97 | 98 | test(`Fenced`, () => { 99 | const res = translate(`
${str}
${str}
`); 100 | expect(res).toBe('```fortran\n' + str + '\n```\n\n```\n' + str + '\n```'); 101 | }); 102 | 103 | test(`Indented`, () => { 104 | const originalCodeFence = instance.options.codeBlockStyle; 105 | instance.options.codeBlockStyle = 'indented'; 106 | 107 | const res = translate(`
${str}
${str}
`); 108 | const exp = str.replace(/^/gm, ' '); 109 | expect(res).toBe(exp + '\n\n' + exp); 110 | 111 | instance.options.codeFence = originalCodeFence; 112 | }); 113 | }); 114 | 115 | describe(`Lists (ol + li, ul + li)`, () => { 116 | test(`Multi-level Ordered List`, () => { 117 | const res = translate(` 118 |
    119 |
  1. a

    b
  2. 120 |
  3. 121 |
  4. b 122 |
    1. c
      d
    123 |
    • e
      f
    124 |
  5. 125 |
126 | `); 127 | expect(res).toBe(`1. a \n \n~~b~~\n2. b \n 1. c \n d \n * e \n f`); 128 | }); 129 | 130 | test(`Multi-level Unordered List`, () => { 131 | const res = translate(` 132 |
    133 |
  • a

    b
  • 134 |
  • 135 |
  • b 136 |
    • c
      d
    137 |
    1. e
      f
    138 |
  • 139 |
140 | `); 141 | expect(res).toBe(`* a \n \n~~b~~\n* b \n * c \n d \n 1. e \n f`); 142 | }); 143 | 144 | test(`List item with block content`, () => { 145 | const res = translate(`
  • a`); 146 | expect(res).toBe(`* ![](hello.jpg) \na`); 147 | }); 148 | }); 149 | }); 150 | -------------------------------------------------------------------------------- /test/options.test.ts: -------------------------------------------------------------------------------- 1 | // noinspection RegExpUnnecessaryNonCapturingGroup,HtmlUnknownTarget 2 | 3 | import { NodeHtmlMarkdown } from '../src'; 4 | 5 | 6 | /* ****************************************************************************************************************** * 7 | * Options Tests 8 | * ****************************************************************************************************************** */ 9 | 10 | describe(`Options`, () => { 11 | let instance: NodeHtmlMarkdown; 12 | const translate = (html: string) => instance.translate(html); 13 | beforeAll(() => { 14 | instance = new NodeHtmlMarkdown(); 15 | }); 16 | 17 | test(`codeFence`, () => { 18 | const originalCodeFence = instance.options.codeFence; 19 | const str = `* test \n\n1. test\n\\Test`; 20 | const html = `
    ${str}
    `; 21 | 22 | const resDefaultFence = translate(html); 23 | expect(resDefaultFence).toBe('```fortran\n' + str + '\n```'); 24 | 25 | instance.options.codeFence = `+++++`; 26 | const resFencePlus = translate(html); 27 | expect(resFencePlus).toBe('+++++fortran\n' + str + '\n+++++'); 28 | 29 | instance.options.codeFence = `?`; 30 | const resFence1Char = translate(html); 31 | expect(resFence1Char).toBe('?fortran\n' + str + '\n?'); 32 | 33 | instance.options.codeFence = originalCodeFence; 34 | }); 35 | 36 | test(`bulletMarker`, () => { 37 | const originalBulletMarker = instance.options.bulletMarker; 38 | const html = `
    • item1
    • item2
    `; 39 | 40 | const resDefaultMarker = translate(html); 41 | expect(resDefaultMarker).toBe(`* item1 42 | * item2`); 43 | 44 | instance.options.bulletMarker = '-'; 45 | const resDashMarker = translate(html); 46 | expect(resDashMarker).toBe(`- item1 47 | - item2`); 48 | 49 | instance.options.bulletMarker = '<->'; 50 | const resWideMarker = translate(html); 51 | expect(resWideMarker).toBe(`<-> item1 52 | <-> item2`); 53 | instance.options.bulletMarker = originalBulletMarker; 54 | }); 55 | 56 | test(`codeBlockStyle`, () => { 57 | const originalCodeFence = instance.options.codeBlockStyle; 58 | const html = `
    line1\nline2
    `; 59 | 60 | instance.options.codeBlockStyle = 'fenced'; 61 | const resFenced = translate(html); 62 | expect(resFenced).toBe('```\nline1\nline2\n```'); 63 | 64 | instance.options.codeBlockStyle = 'indented'; 65 | const resIndented = translate(html); 66 | expect(resIndented).toBe('line1\nline2'.replace(/^/gm, ' ')); 67 | 68 | instance.options.codeFence = originalCodeFence; 69 | }); 70 | 71 | test(`emDelimiter`, () => { 72 | const originalEmDelimiter = instance.options.emDelimiter; 73 | const html = `some textmore text`; 74 | 75 | const resDefaultEmDelimiter = translate(html); 76 | expect(resDefaultEmDelimiter).toBe(`_some text_ _more text_`); 77 | 78 | instance.options.emDelimiter = '|'; 79 | const resShortEmDelimiter = translate(`some textmore text`); 80 | expect(resShortEmDelimiter).toBe(`|some text| |more text|`); 81 | 82 | instance.options.emDelimiter = '+++'; 83 | const resWideEmDelimiter = translate(`some textmore text`); 84 | expect(resWideEmDelimiter).toBe(`+++some text+++ +++more text+++`); 85 | instance.options.emDelimiter = originalEmDelimiter; 86 | }); 87 | 88 | test(`strongDelimiter`, () => { 89 | const originalStrongDelimiter = instance.options.strongDelimiter; 90 | const html = `some textmore text`; 91 | 92 | const resDefaultStrongDelimiter = translate(html); 93 | expect(resDefaultStrongDelimiter).toBe(`**some text** **more text**`); 94 | 95 | instance.options.strongDelimiter = '|'; 96 | const resShortStrongDelimiter = translate(html); 97 | expect(resShortStrongDelimiter).toBe(`|some text| |more text|`); 98 | 99 | instance.options.strongDelimiter = '+++'; 100 | const resWideStrongDelimiter = translate(html); 101 | expect(resWideStrongDelimiter).toBe(`+++some text+++ +++more text+++`); 102 | instance.options.strongDelimiter = originalStrongDelimiter; 103 | }); 104 | 105 | 106 | test(`strikeDelimiter`, () => { 107 | const originalStrikeDelimiter = instance.options.strikeDelimiter; 108 | const html = `some textmore textone more text`; 109 | 110 | const resDefaultStrikeDelimiter = translate(html); 111 | expect(resDefaultStrikeDelimiter).toBe(`~~some text~~ ~~more text~~ ~~one more text~~`); 112 | 113 | instance.options.strikeDelimiter = '~'; 114 | const resShortStrikeDelimiter = translate(html); 115 | expect(resShortStrikeDelimiter).toBe(`~some text~ ~more text~ ~one more text~`); 116 | 117 | instance.options.strikeDelimiter = '+++'; 118 | const resWideStrikeDelimiter = translate(html); 119 | expect(resWideStrikeDelimiter).toBe(`+++some text+++ +++more text+++ +++one more text+++`); 120 | instance.options.strikeDelimiter = originalStrikeDelimiter; 121 | }); 122 | 123 | test(`ignore`, () => { 124 | const strongEmHTML = `some textmore text`; 125 | 126 | const instanceIgnore = new NodeHtmlMarkdown({ 127 | ignore: ['STRONG'] 128 | }); 129 | const resNoStrong = instanceIgnore.translate(strongEmHTML); 130 | expect(resNoStrong).toBe(`_more text_`); 131 | 132 | const instanceIgnoreEm = new NodeHtmlMarkdown({ 133 | ignore: ['EM'] 134 | }); 135 | const resNoEm = instanceIgnoreEm.translate(strongEmHTML); 136 | expect(resNoEm).toBe(`**some text**`); 137 | 138 | const instanceIgnoreBoth = new NodeHtmlMarkdown({ 139 | ignore: ['EM', 'STRONG'] 140 | }); 141 | const resNoEmStrong = instanceIgnoreBoth.translate(strongEmHTML); 142 | expect(resNoEmStrong).toBe(``); 143 | 144 | const instanceIgnoreMiss = new NodeHtmlMarkdown({ 145 | ignore: ['UL', 'H1'] 146 | }); 147 | const resWithAll = instanceIgnoreMiss.translate(strongEmHTML); 148 | expect(resWithAll).toBe(`**some text**_more text_`); 149 | }); 150 | 151 | test(`blockElements`, () => { 152 | const html = `xyyyxtext`; 153 | const instanceStrongBlock = new NodeHtmlMarkdown({ 154 | blockElements: ['STRONG'] 155 | }); 156 | const resStrongBlock = instanceStrongBlock.translate(html); 157 | expect(resStrongBlock).toBe(`_x_ 158 | 159 | **yyy** 160 | 161 | _x_text`); 162 | 163 | const instanceEmBlock = new NodeHtmlMarkdown({ 164 | blockElements: ['EM'] 165 | }); 166 | const resEmBlock = instanceEmBlock.translate(html); 167 | expect(resEmBlock).toBe(`_x_ 168 | 169 | **yyy** 170 | 171 | _x_ 172 | 173 | text`); 174 | }); 175 | 176 | test(`maxConsecutiveNewlines`, () => { 177 | const originalMaxConsecutiveNewlines = instance.options.maxConsecutiveNewlines; 178 | const html = `text${'
    '.repeat(10)}something`; 179 | 180 | const resDefaultMaxNewLines = translate(html); 181 | expect(resDefaultMaxNewLines).toBe(`**text**${' \n'.repeat(3)}_something_`); 182 | 183 | instance.options.maxConsecutiveNewlines = 5; 184 | const res5MaxNewLines = translate(html); 185 | expect(res5MaxNewLines).toBe(`**text**${' \n'.repeat(5)}_something_`); 186 | 187 | instance.options.maxConsecutiveNewlines = 10; 188 | const res10MaxNewLines = translate(html); 189 | expect(res10MaxNewLines).toBe(`**text**${' \n'.repeat(10)}_something_`); 190 | 191 | instance.options.maxConsecutiveNewlines = originalMaxConsecutiveNewlines; 192 | }); 193 | 194 | test(`lineStartEscape`, () => { 195 | const originalLineStartEscape = instance.options.lineStartEscape; 196 | 197 | const resEscapedPlus = translate(`

    text
    + text
    + more text

    `); 198 | expect(resEscapedPlus).toBe("text \n\\+ text \n\\+ more text"); 199 | 200 | const resEscapedQuote = translate(`

    text
    > text
    > more text

    `); 201 | expect(resEscapedQuote).toBe("text \n\\> text \n\\> more text"); 202 | 203 | // No escape for + 204 | instance.options.lineStartEscape = [/^(\s*?)((?:[=>-])|(?:#{1,6}\s))|(?:(\d+)(\.\s))/gm, '$1$3\\$2$4']; 205 | 206 | const resNotEscapedPlus = translate(`

    text
    + text
    + more text

    `); 207 | expect(resNotEscapedPlus).toBe("text \n+ text \n+ more text"); 208 | 209 | // No escape also for > 210 | instance.options.lineStartEscape = [/^(\s*?)((?:#{1,6}\s))|(?:(\d+)(\.\s))/gm, '$1$3\\$2$4']; 211 | 212 | const resNotEscapedQuote = translate(`

    text
    > text
    > more text

    `); 213 | expect(resNotEscapedQuote).toBe("text \n> text \n> more text"); 214 | 215 | instance.options.lineStartEscape = originalLineStartEscape; 216 | }); 217 | 218 | test(`globalEscape`, () => { 219 | const originalGlobalEscape = instance.options.globalEscape; 220 | 221 | const resEscapedStar = translate(`text**text`); 222 | expect(resEscapedStar).toBe("**text\\*\\*text**"); 223 | 224 | // No escape for star 225 | instance.options.globalEscape = [ /[_~\[\]]/gm, '\\$&' ]; 226 | 227 | const resNotEscapedStar = translate(`text**text`); 228 | expect(resNotEscapedStar).toBe("_text**text_"); 229 | 230 | const resEscapedBrackets = translate(`

    title [more words]

    `); 231 | expect(resEscapedBrackets).toBe("# title \\[more words\\]"); 232 | 233 | // No escape also for brackets 234 | instance.options.globalEscape = [ /[_~]/gm, '\\$&' ]; 235 | const resNotEscapedBrackets = translate(`

    title [more words]

    `); 236 | expect(resNotEscapedBrackets).toBe("# title [more words]"); 237 | 238 | instance.options.globalEscape = originalGlobalEscape; 239 | }); 240 | 241 | test(`textReplace`, () => { 242 | const originalReplace = instance.options.textReplace; 243 | 244 | instance.options.textReplace = [[/abc/g, "xyz"]]; 245 | const replaced = translate('

    hello abc

    '); 246 | expect(replaced).toBe(`# hello xyz`); 247 | 248 | instance.options.textReplace = [[/hello/g, "X"]]; 249 | const replaced2 = translate('

    hello abc

    '); 250 | expect(replaced2).toBe(`# X abc`); 251 | 252 | instance.options.textReplace = originalReplace; 253 | }); 254 | 255 | test(`keepDataImages`, () => { 256 | const originalKeepDataImages = instance.options.keepDataImages; 257 | 258 | instance.options.keepDataImages = true; 259 | const resKeep = translate(`normal 260 | `); 261 | expect(resKeep).toBe(`![normal](normal_img.jpg) ![](data:image/gif;base64,R0lGODlhEA)`); 262 | 263 | instance.options.keepDataImages = false; 264 | const resNoKeep = translate(`normal 265 | `); 266 | expect(resNoKeep).toBe(`![normal](normal_img.jpg) `); 267 | 268 | instance.options.keepDataImages = originalKeepDataImages; 269 | }); 270 | 271 | test(`useLinkReferenceDefinitions`, () => { 272 | const originalUseLinkReferenceDefinitions = instance.options.useLinkReferenceDefinitions; 273 | 274 | const url = 'http://www.github.com/crosstype'; 275 | const html = `Hello:  276 | a

    bc
    277 | ab 278 | link2 279 | repeat link 280 | ${url} Goodbye! 281 | `; 282 | 283 | instance.options.useLinkReferenceDefinitions = false; 284 | let res = translate(html); 285 | expect(res).toBe( 286 | `Hello: [a b**c**](${url}) a**b** [link2](${url}/other) [repeat link](${url}) <${url}> Goodbye!` 287 | ); 288 | 289 | instance.options.useLinkReferenceDefinitions = true; 290 | res = translate(html); 291 | expect(res).toBe( 292 | `Hello: [a b**c**][1] a**b** [link2][2] [repeat link][1] <${url}> Goodbye!\n\n[1]: ${url}\n[2]: ${url}/other` 293 | ); 294 | 295 | instance.options.useLinkReferenceDefinitions = originalUseLinkReferenceDefinitions; 296 | }); 297 | 298 | test(`useInlineLinks`, () => { 299 | const originalUseInlineLinksDefinitions = instance.options.useInlineLinks; 300 | 301 | const url = 'http://www.github.com/crosstype'; 302 | const html = `Hello:  303 | ${url}   304 | ab 305 | link2 306 | repeat link Goodbye! 307 | `; 308 | 309 | instance.options.useInlineLinks = false; 310 | let res = translate(html); 311 | expect(res).toBe(`Hello: [${url}](${url}) a**b** [link2](${url}/other) [repeat link](${url}) Goodbye!`); 312 | 313 | instance.options.useInlineLinks = true; 314 | res = translate(html); 315 | expect(res).toBe( 316 | `Hello: <${url}> a**b** [link2](${url}/other) [repeat link](${url}) Goodbye!` 317 | ); 318 | 319 | instance.options.useLinkReferenceDefinitions = originalUseInlineLinksDefinitions; 320 | }); 321 | }); 322 | -------------------------------------------------------------------------------- /test/special-cases.test.ts: -------------------------------------------------------------------------------- 1 | import { NodeHtmlMarkdown } from '../src'; 2 | 3 | 4 | /* ****************************************************************************************************************** * 5 | * Config 6 | * ****************************************************************************************************************** */ 7 | 8 | const textFormatTags = [ 'strong', 'b', 'del', 's', 'strike', 'em', 'i' ] as const; 9 | const getDelims = (instance: NodeHtmlMarkdown) => Object.fromEntries(textFormatTags.map(t => [ 10 | t, 11 | (() => { 12 | switch (t) { 13 | case 'strong': 14 | case 'b': 15 | return instance.options.strongDelimiter; 16 | case 'del': 17 | case 's': 18 | case 'strike': 19 | return instance.options.strikeDelimiter; 20 | case 'em': 21 | case 'i': 22 | return instance.options.emDelimiter; 23 | } 24 | })() 25 | ])); 26 | 27 | 28 | /* ****************************************************************************************************************** * 29 | * Tests 30 | * ****************************************************************************************************************** */ 31 | 32 | describe(`Special Cases`, () => { 33 | let instance: NodeHtmlMarkdown; 34 | let delims: ReturnType; 35 | const translate = (html: string) => instance.translate(html); 36 | beforeAll(() => { 37 | instance = new NodeHtmlMarkdown(); 38 | delims = getDelims(instance); 39 | }); 40 | 41 | test(`Removes uncaught Doctype`, () => { 42 | const res = translate(`abc`); 43 | expect(res).toBe(`abc`); 44 | }); 45 | 46 | describe(`Whitespace handled for leading / trailing whitespace in tags`, () => { 47 | test.each(textFormatTags)(`%s`, tag => { 48 | const delim = delims[tag]; 49 | 50 | expect(translate(`

    <${tag}>  Label:  Value

    `)).toBe(` ${delim}Label:${delim} Value`); 51 | expect(translate(`

    <${tag}>  Label:  Value

    `)).toBe(` ${delim}Label:${delim} Value`); 52 | }); 53 | }); 54 | 55 | // See: https://github.com/crosstype/node-html-markdown/issues/18 56 | describe(`Removes nested text formatting tags`, () => { 57 | test.each(textFormatTags)(`%s`, tag => { 58 | const delim = delims[tag]; 59 | 60 | expect(translate(`<${tag}>My <${tag}>bold text`)).toBe( 61 | `${delim}My bold text${delim}` 62 | ); 63 | }); 64 | }); 65 | 66 | // See: https://github.com/crosstype/node-html-markdown/issues/16 67 | // See: https://github.com/crosstype/node-html-markdown/issues/21 68 | test(`Handles whitespace with single space`, () => { 69 | const res = translate(`test test2 \ntest3\r\n\r\n\t\t\ttest4\ttest5\r\n\n\n\t\ttest6`); 70 | expect(res).toBe(`test test2 test3 test4 test5 test6`); 71 | }); 72 | 73 | // See: https://github.com/crosstype/node-html-markdown/issues/19 74 | test(`Childless nodes visited if preserveIfEmpty set`, () => { 75 | const html = `Hello