├── .clang-format ├── .clangd ├── .editorconfig ├── .gitignore ├── CMakeLists.txt ├── Doxyfile ├── LICENSE ├── README.md ├── docs └── html │ ├── annotated.html │ ├── bc_s.png │ ├── bdwn.png │ ├── classes.html │ ├── classjis__enc-members.html │ ├── classjis__enc.html │ ├── classjis__enc.png │ ├── classshift__jis-members.html │ ├── classshift__jis.html │ ├── classshift__jis.png │ ├── closed.png │ ├── dir_68267d1309a1af8e8297ef4c3efbcdba.html │ ├── dir_d44c64559bbebec7f509842c48db8b23.html │ ├── doc.png │ ├── doxygen.css │ ├── doxygen.png │ ├── dynsections.js │ ├── files.html │ ├── folderclosed.png │ ├── folderopen.png │ ├── functions.html │ ├── functions_func.html │ ├── functions_vars.html │ ├── hierarchy.html │ ├── index.html │ ├── jis__enc_8h_source.html │ ├── jquery.js │ ├── main_8h_source.html │ ├── menu.js │ ├── menudata.js │ ├── nav_f.png │ ├── nav_g.png │ ├── nav_h.png │ ├── open.png │ ├── search │ ├── all_0.html │ ├── all_0.js │ ├── all_1.html │ ├── all_1.js │ ├── all_2.html │ ├── all_2.js │ ├── all_3.html │ ├── all_3.js │ ├── all_4.html │ ├── all_4.js │ ├── all_5.html │ ├── all_5.js │ ├── all_6.html │ ├── all_6.js │ ├── all_7.html │ ├── all_7.js │ ├── all_8.html │ ├── all_8.js │ ├── classes_0.html │ ├── classes_0.js │ ├── classes_1.html │ ├── classes_1.js │ ├── classes_2.html │ ├── classes_2.js │ ├── close.png │ ├── functions_0.html │ ├── functions_0.js │ ├── functions_1.html │ ├── functions_1.js │ ├── functions_2.html │ ├── functions_2.js │ ├── functions_3.html │ ├── functions_3.js │ ├── mag_sel.png │ ├── nomatches.html │ ├── pages_0.html │ ├── pages_0.js │ ├── search.css │ ├── search.js │ ├── search_l.png │ ├── search_m.png │ ├── search_r.png │ ├── searchdata.js │ ├── variables_0.html │ ├── variables_0.js │ ├── variables_1.html │ ├── variables_1.js │ ├── variables_2.html │ ├── variables_2.js │ ├── variables_3.html │ ├── variables_3.js │ ├── variables_4.html │ └── variables_4.js │ ├── shift__jis_8h_source.html │ ├── splitbar.png │ ├── structfound__string-members.html │ ├── structfound__string.html │ ├── sync_off.png │ ├── sync_on.png │ ├── tab_a.png │ ├── tab_b.png │ ├── tab_h.png │ ├── tab_s.png │ └── tabs.css ├── inc ├── enc_cp932.hpp ├── enc_eucjp.hpp ├── enc_shiftjis.hpp ├── enc_validator.hpp ├── filesys.hpp ├── fstreams.hpp ├── iconv_encoding.hpp ├── jis_validator.hpp ├── jstrings.hpp ├── types.hpp ├── uniconv.hpp └── usage.hpp └── src ├── app.hpp ├── app.hpp.cfg ├── enc_cp932.cpp ├── enc_eucjp.cpp ├── enc_shiftjis.cpp ├── jstrings.cpp ├── main.cpp └── usage.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | BasedOnStyle: LLVM 3 | 4 | AlignAfterOpenBracket: DontAlign 5 | AlignEscapedNewlines: DontAlign 6 | AlignOperands: Align 7 | AlignTrailingComments: true 8 | AllowAllArgumentsOnNextLine: true 9 | AllowAllConstructorInitializersOnNextLine: true 10 | AlignArrayOfStructures: None 11 | AllowShortBlocksOnASingleLine: false 12 | AllowShortCaseLabelsOnASingleLine: false 13 | AllowShortFunctionsOnASingleLine: Empty 14 | AllowShortLoopsOnASingleLine: false 15 | AllowShortIfStatementsOnASingleLine: Never 16 | AlwaysBreakBeforeMultilineStrings: true 17 | AlwaysBreakTemplateDeclarations: Yes 18 | BreakBeforeBraces: Allman 19 | BreakBeforeTernaryOperators: true 20 | BreakConstructorInitializers: AfterColon 21 | BinPackArguments: false 22 | BinPackParameters: false 23 | ColumnLimit: 120 24 | ContinuationIndentWidth: 2 25 | Cpp11BracedListStyle: true 26 | Language: Cpp 27 | MaxEmptyLinesToKeep: 1 28 | IndentCaseLabels: true 29 | PackConstructorInitializers: Never 30 | PointerAlignment: Middle 31 | ReferenceAlignment: Pointer 32 | SortIncludes: true 33 | SpaceAfterCStyleCast: true 34 | SpaceAfterLogicalNot: true 35 | SpaceAroundPointerQualifiers: Both 36 | SpaceBeforeAssignmentOperators: true 37 | SpaceBeforeCpp11BracedList: true 38 | SpaceBeforeCtorInitializerColon: true 39 | SpaceBeforeInheritanceColon: true 40 | SpaceBeforeRangeBasedForLoopColon: true 41 | SpaceBeforeParens: ControlStatements 42 | SpacesInAngles: false 43 | SpacesInCStyleCastParentheses: false 44 | SpacesInParentheses: false 45 | SpacesInSquareBrackets: false 46 | Standard: Cpp11 47 | TabWidth: 2 48 | UseTab: Always 49 | -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | CompileFlags: 2 | # Treat code as C++, use C++17 standard, enable more warnings. 3 | Add: [-xc++, -std=c++17, -Wall] 4 | Diagnostics: 5 | ClangTidy: 6 | Add: [performance*, modernize*] 7 | Remove: [modernize-use-trailing-return-type, modernize-avoid-c-arrays] 8 | CheckOptions: 9 | readability-identifier-naming.VariableCase: SnakeCase 10 | 11 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | end_of_line = lf 3 | insert_final_newline = true 4 | charset = utf-8 5 | trim_trailing_whitespace = true 6 | 7 | [*.{c,h,cpp,hpp}] 8 | indent_style = tab 9 | indent_size = 2 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cache/ 2 | .vscode 3 | *.code-workspace 4 | bin 5 | build 6 | etc 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(CheckIncludeFiles) 2 | 3 | # define project 4 | cmake_minimum_required(VERSION 3.5) 5 | 6 | project(jstrings VERSION 1.6 LANGUAGES CXX) 7 | 8 | set(PROJECT_CONTACT "Damian R (damian@motoi.pro)") 9 | set(PROJECT_WEBSITE "https://github.com/drojaazu") 10 | set(PROJECT_COPYRIGHT "©2018 Motoi Productions / Released under MIT License") 11 | set(PROJECT_BRIEF "A tool for finding JIS-based Japanese text in binary data.") 12 | 13 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/app.hpp.cfg" "${CMAKE_CURRENT_SOURCE_DIR}/src/app.hpp" ESCAPE_QUOTES) 14 | 15 | set(CMAKE_CXX_STANDARD 17) 16 | set(CMAKE_CXX_COMPILER_NAMES clang++ g++ icpc c++ cxx) 17 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") 18 | 19 | if(NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt) 20 | if(NOT CMAKE_BUILD_TYPE) 21 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) 22 | endif() 23 | endif() 24 | 25 | # define target 26 | aux_source_directory("${CMAKE_CURRENT_SOURCE_DIR}/src" SRCFILES) 27 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/inc") 28 | 29 | add_executable(${PROJECT_NAME} ${SRCFILES}) 30 | 31 | target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_17) 32 | 33 | find_library(ICONV_LIB iconv) 34 | 35 | if(NOT ICONV_LIB) 36 | message(FATAL_ERROR "libiconv not found") 37 | endif() 38 | 39 | target_link_libraries(${PROJECT_NAME} iconv) 40 | 41 | install(TARGETS jstrings 42 | RUNTIME DESTINATION bin) 43 | -------------------------------------------------------------------------------- /Doxyfile: -------------------------------------------------------------------------------- 1 | # Doxyfile 1.8.13 2 | DOXYFILE_ENCODING = UTF-8 3 | PROJECT_NAME = jstrings 4 | PROJECT_NUMBER = 1 5 | PROJECT_BRIEF = "A tool for finding JIS-based character strings in binary streams" 6 | OUTPUT_DIRECTORY = docs 7 | TAB_SIZE = 2 8 | USE_MDFILE_AS_MAINPAGE = README.md 9 | GENERATE_LATEX = NO 10 | GENERATE_HTML = YES 11 | 12 | INPUT = README.md src include 13 | FILE_PATTERNS = *.cpp *.h 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2019 Damian Rogers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jstrings 2 | 3 | A tool for finding JIS encoded Japanese text in binary data. 4 | 5 | ## Usage 6 | 7 | jstrings [options] [input_file] 8 | 9 | Input can be a filename or data from stdin. Output is sent to stdout. 10 | 11 | ### Options 12 | 13 | -e encoding 14 | --encoding encoding 15 | 16 | Specify the encoding to use. Use one of the strings listed in parantheses below for that encoding: 17 | 18 | * Shift-JIS (shift-jis, shiftjis, sjis) 19 | * EUC-JP (euc, euc-jp, eucjp) 20 | * Microsoft CP932 (cp932, windows932, windows31j) 21 | 22 | Optional; default is Shift-JIS. 23 | 24 | -l value 25 | --match-length value 26 | 27 | Set number of consecutive characters required to be considered a valid string. 28 | 29 | Optional; default is 5. 30 | 31 | -c value 32 | --cutoff value 33 | 34 | Limit the output to the specified number of characters for a string. This is useful for "previewing" a file which may have large blocks of junk data that happen to fall within the range of valid code points. Strings that are cut off will be appended with an ellipsis. 35 | 36 | Note that the length is in bytes, not characters. As such, due to the variable width nature of UTF-8, there is a chance the final character displayed may be incorrect. STL string functions do not work natively with encodings and the author feels that the work needed to implement this for an optional feature that should only be used for quickly previewing data would be overly complex. 37 | 38 | Optional; default is no cutoff. 39 | 40 | -m 41 | --multiline 42 | 43 | Include newline characters (0x0D or 0x0D0A) as valid. Otherwise, these will count as end of string markers. 44 | 45 | Optional; default is disabled. 46 | 47 | -r 48 | --raw 49 | 50 | Output the data in its original encoding without converting to Unicode. 51 | 52 | Optional; default is disabled (will convert output strings to UTF-8 using libiconv). 53 | 54 | -s 55 | --skip-jis0201 56 | 57 | Skip checking for JIS X 0201 characters. These is an 8 bit katakana-only code space that acts as a supplement to ASCII and was generally only used in older (early to mid 1980s) home computers. Disabling this can reduce false positives if you are working with newer data. 58 | 59 | Optional; default is disabled (will include JIS X 0201 code points as valid matches). 60 | 61 | ## Output 62 | 63 | Found strings are prepended with the offset in which they were found in the original data and sent to stdout. Strings are converted to UTF-8 using libiconv. The original encoding can be preserved by using the `--raw` option. 64 | 65 | ## Building 66 | 67 | CMake is used for the build system. From the root directory: 68 | 69 | mkdir build && cd build 70 | cmake .. 71 | make 72 | sudo make install 73 | -------------------------------------------------------------------------------- /docs/html/annotated.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Class List 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |
64 |
Class List
65 |
66 |
67 |
Here are the classes, structs, unions and interfaces with brief descriptions:
68 | 69 | 70 | 71 | 72 |
 Cfound_stringPOD structure for containing a found string
 Cjis_encAbstract class for JIS based encoding classes
 Cshift_jisFind Shift-JIS encoded strings in a byte stream
73 |
74 |
75 | 76 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /docs/html/bc_s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/bc_s.png -------------------------------------------------------------------------------- /docs/html/bdwn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/bdwn.png -------------------------------------------------------------------------------- /docs/html/classes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Class Index 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |
64 |
Class Index
65 |
66 |
67 |
f | j | s
68 | 69 | 73 | 74 | 75 | 76 |
  f  
70 |
  j  
71 |
  s  
72 |
found_string   jis_enc   shift_jis   
77 |
f | j | s
78 |
79 | 80 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /docs/html/classjis__enc-members.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Member List 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 |
62 |
63 |
64 |
jis_enc Member List
65 |
66 |
67 | 68 |

This is the complete list of members for jis_enc, including all inherited members.

69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 |
find()=0jis_encpure virtual
get_is_big_endian()jis_enc
get_min_len()jis_enc
get_use_jisx0213()jis_enc
instream (defined in jis_enc)jis_encprotected
is_big_endianjis_encprotected
jis_enc(std::istream *instream) (defined in jis_enc)jis_enc
min_lenjis_encprotected
set_is_big_endian(bool is_big_endian)jis_enc
set_min_len(size_t min_len)jis_enc
set_use_jisx0213(bool use_jisx0213)jis_enc
use_jisx0213jis_encprotected
~jis_enc() (defined in jis_enc)jis_encvirtual
84 | 85 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /docs/html/classjis__enc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: jis_enc Class Reference 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 |
62 |
63 |
64 | Public Member Functions | 65 | Protected Attributes | 66 | List of all members
67 |
68 |
jis_enc Class Referenceabstract
69 |
70 |
71 | 72 |

Abstract class for JIS based encoding classes. 73 | More...

74 | 75 |

#include <jis_enc.h>

76 |
77 | Inheritance diagram for jis_enc:
78 |
79 |
80 | 81 | 82 | shift_jis 83 | 84 |
85 | 86 | 88 | 90 | 91 | 93 | 94 | 95 | 97 | 98 | 99 | 101 | 102 | 103 | 105 | 106 | 107 | 109 | 110 | 111 | 113 | 114 | 115 | 117 | 118 | 119 |

87 | Public Member Functions

89 |  jis_enc (std::istream *instream)
 
92 | virtual std::vector< found_string > * find ()=0
 Performs the search of the data stream.
 
96 | void set_min_len (size_t min_len)
 Setter for min_len.
 
100 | size_t get_min_len ()
 Getter for min_len.
 
104 | void set_is_big_endian (bool is_big_endian)
 Setter for is_big_endian.
 
108 | bool get_is_big_endian ()
 Getter for is_big_endian.
 
112 | void set_use_jisx0213 (bool use_jisx0213)
 Setter for use_jisx0213.
 
116 | bool get_use_jisx0213 ()
 Getter for use_jisx0213.
 
120 | 122 | 124 | 125 | 127 | 128 | 129 | 131 | 132 | 133 | 135 | 136 | 137 |

121 | Protected Attributes

123 | std::istream * instream
 
126 | size_t min_len = 10
 Minimum number of characters to match to count as a found string.
 
130 | bool is_big_endian = true
 Determines byte order for multibyte characters.
 
134 | bool use_jisx0213 = false
 Use the JIS X 0213 character set instead of JIS X 0208.
 
138 |

Detailed Description

139 |

Abstract class for JIS based encoding classes.

140 |

The documentation for this class was generated from the following files: 144 |
145 | 146 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/html/classjis__enc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/classjis__enc.png -------------------------------------------------------------------------------- /docs/html/classshift__jis-members.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Member List 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 |
62 |
63 |
64 |
shift_jis Member List
65 |
66 |
67 | 68 |

This is the complete list of members for shift_jis, including all inherited members.

69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 |
find()shift_jisvirtual
get_is_big_endian()jis_enc
get_min_len()jis_enc
get_use_jisx0213()jis_enc
instream (defined in jis_enc)jis_encprotected
is_big_endianjis_encprotected
is_jisx0201_printable(const uint8_t *c)shift_jisstatic
is_jisx0208(const uint8_t *c_h, const uint8_t *c_l)shift_jisstatic
is_jisx0213(const uint8_t *c_h, const uint8_t *c_l)shift_jisstatic
jis_enc(std::istream *instream) (defined in jis_enc)jis_enc
min_lenjis_encprotected
set_is_big_endian(bool is_big_endian)jis_enc
set_min_len(size_t min_len)jis_enc
set_use_jisx0213(bool use_jisx0213)jis_enc
shift_jis(std::istream *instream) (defined in shift_jis)shift_jisinline
use_jisx0213jis_encprotected
~jis_enc() (defined in jis_enc)jis_encvirtual
~shift_jis() (defined in shift_jis)shift_jis
89 | 90 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /docs/html/classshift__jis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/classshift__jis.png -------------------------------------------------------------------------------- /docs/html/closed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/closed.png -------------------------------------------------------------------------------- /docs/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: src Directory Reference 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 | 65 |
66 |
67 |
68 |
src Directory Reference
69 |
70 |
71 |
72 | 73 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/html/dir_d44c64559bbebec7f509842c48db8b23.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: include Directory Reference 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 | 65 |
66 |
67 |
68 |
include Directory Reference
69 |
70 |
71 |
72 | 73 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/html/doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/doc.png -------------------------------------------------------------------------------- /docs/html/doxygen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/doxygen.png -------------------------------------------------------------------------------- /docs/html/dynsections.js: -------------------------------------------------------------------------------- 1 | function toggleVisibility(linkObj) 2 | { 3 | var base = $(linkObj).attr('id'); 4 | var summary = $('#'+base+'-summary'); 5 | var content = $('#'+base+'-content'); 6 | var trigger = $('#'+base+'-trigger'); 7 | var src=$(trigger).attr('src'); 8 | if (content.is(':visible')===true) { 9 | content.hide(); 10 | summary.show(); 11 | $(linkObj).addClass('closed').removeClass('opened'); 12 | $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png'); 13 | } else { 14 | content.show(); 15 | summary.hide(); 16 | $(linkObj).removeClass('closed').addClass('opened'); 17 | $(trigger).attr('src',src.substring(0,src.length-10)+'open.png'); 18 | } 19 | return false; 20 | } 21 | 22 | function updateStripes() 23 | { 24 | $('table.directory tr'). 25 | removeClass('even').filter(':visible:even').addClass('even'); 26 | } 27 | 28 | function toggleLevel(level) 29 | { 30 | $('table.directory tr').each(function() { 31 | var l = this.id.split('_').length-1; 32 | var i = $('#img'+this.id.substring(3)); 33 | var a = $('#arr'+this.id.substring(3)); 34 | if (l 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: File List 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |
64 |
File List
65 |
66 |
67 |
Here is a list of all documented files with brief descriptions:
68 |
[detail level 12]
69 | 70 | 71 | 72 | 73 |
  include
 jis_enc.h
 main.h
 shift_jis.h
74 |
75 |
76 | 77 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /docs/html/folderclosed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/folderclosed.png -------------------------------------------------------------------------------- /docs/html/folderopen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/folderopen.png -------------------------------------------------------------------------------- /docs/html/functions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Class Members 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |
Here is a list of all documented class members with links to the class documentation for each member:
111 |
112 | 113 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /docs/html/functions_func.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Class Members - Functions 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |   96 |
97 | 98 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /docs/html/functions_vars.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Class Members - Variables 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |   80 |
81 | 82 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /docs/html/hierarchy.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Class Hierarchy 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |
64 |
Class Hierarchy
65 |
66 |
67 |
This inheritance list is sorted roughly, but not completely, alphabetically:
68 |
[detail level 12]
69 | 70 | 71 | 72 |
 Cfound_stringPOD structure for containing a found string
 Cjis_encAbstract class for JIS based encoding classes
 Cshift_jisFind Shift-JIS encoded strings in a byte stream
73 |
74 |
75 | 76 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /docs/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: jstrings 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 |
48 | 49 |
53 |
54 | 55 | 56 |
57 | 60 |
61 | 62 |
63 |
64 |
jstrings
65 |
66 |
67 |

A tool for finding JIS-based Japanese characters in binary data.

68 |

Usage

69 |

jstrings [options] [input]

70 |

Input can be a filename or data from stdin.

71 |

Options

72 |

-m number

73 |

Set minimum number of characters to match as a valid string. Default: 10.

74 |

-e encoding

75 |

Specify the encoding to use. Currently, the only valid value is "shift-jis". Default: shift-jis

76 |

-l

77 |

Use little-endian order for multibyte characters

78 |

-jisx0213

79 |

Use JIS X 0213 character set instead of JIS X 0208 for double byte characters

80 |

Notes

81 |

Data is output in its original encoding without any conversion. Other tools, such as iconv, can do conversion to something more useful (such as UTF8). For example:

82 |

jstrings file.bin | iconv -f SHIFT-JIS -t UTF-8 -c

83 |

To Do

84 |
    85 |
  • Add support for other JIS encodings: CP932, EUC
  • 86 |
  • Add support for JIS X 0212 for non-SJIS encodings (only EUC?)
  • 87 |
  • Add option to only return strings with double-byte characters present
  • 88 |
89 |
90 | 91 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /docs/html/jis__enc_8h_source.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: include/jis_enc.h Source File 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 | 65 |
66 |
67 |
68 |
jis_enc.h
69 |
70 |
71 |
1 
7 #ifndef JIS_ENC_H
8 #define JIS_ENC_H
9 #include <iostream>
10 #include <vector>
11 
15 struct found_string {
19  off_t address;
23  std::vector<uint8_t> data;
24 };
25 
29 class jis_enc {
30 protected:
31  std::istream* instream;
35  size_t min_len = 10;
39  bool is_big_endian = true;
43  bool use_jisx0213 = false;
44 
45 public:
46  jis_enc(std::istream* instream);
47  virtual ~jis_enc();
51  virtual std::vector<found_string>* find() = 0;
55  void set_min_len(size_t min_len);
59  size_t get_min_len();
63  void set_is_big_endian(bool is_big_endian);
67  bool get_is_big_endian();
71  void set_use_jisx0213(bool use_jisx0213);
75  bool get_use_jisx0213();
76 };
77 
78 #endif // JIS_ENC_H
std::vector< uint8_t > data
The extracted string data.
Definition: jis_enc.h:23
72 |
off_t address
The offset of the beginning of the found string relative to the start of the stream.
Definition: jis_enc.h:19
73 |
Abstract class for JIS based encoding classes.
Definition: jis_enc.h:29
74 |
POD structure for containing a found string.
Definition: jis_enc.h:15
75 |
76 | 77 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /docs/html/main_8h_source.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: include/main.h Source File 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 29 | 30 | 31 |
24 |
jstrings 25 |  1 26 |
27 |
A tool for finding JIS-based character strings in binary streams
28 |
32 |
33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
52 |
53 | 54 | 55 |
56 | 59 |
60 | 61 | 65 |
66 |
67 |
68 |
main.h
69 |
70 |
71 |
1 
13 enum encodings { shift_jis_enc, cp932_enc, euc_enc };
14 
15 int process_args(int argc, char **argv);
72 | 73 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/html/menu.js: -------------------------------------------------------------------------------- 1 | function initMenu(relPath,searchEnabled,serverSide,searchPage,search) { 2 | function makeTree(data,relPath) { 3 | var result=''; 4 | if ('children' in data) { 5 | result+=''; 12 | } 13 | return result; 14 | } 15 | 16 | $('#main-nav').append(makeTree(menudata,relPath)); 17 | $('#main-nav').children(':first').addClass('sm sm-dox').attr('id','main-menu'); 18 | if (searchEnabled) { 19 | if (serverSide) { 20 | $('#main-menu').append('
  • '); 21 | } else { 22 | $('#main-menu').append('
  • '); 23 | } 24 | } 25 | $('#main-menu').smartmenus(); 26 | } 27 | -------------------------------------------------------------------------------- /docs/html/menudata.js: -------------------------------------------------------------------------------- 1 | var menudata={children:[ 2 | {text:"Main Page",url:"index.html"}, 3 | {text:"Classes",url:"annotated.html",children:[ 4 | {text:"Class List",url:"annotated.html"}, 5 | {text:"Class Index",url:"classes.html"}, 6 | {text:"Class Hierarchy",url:"hierarchy.html"}, 7 | {text:"Class Members",url:"functions.html",children:[ 8 | {text:"All",url:"functions.html"}, 9 | {text:"Functions",url:"functions_func.html"}, 10 | {text:"Variables",url:"functions_vars.html"}]}]}, 11 | {text:"Files",url:"files.html",children:[ 12 | {text:"File List",url:"files.html"}]}]} 13 | -------------------------------------------------------------------------------- /docs/html/nav_f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/nav_f.png -------------------------------------------------------------------------------- /docs/html/nav_g.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/nav_g.png -------------------------------------------------------------------------------- /docs/html/nav_h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/nav_h.png -------------------------------------------------------------------------------- /docs/html/open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/open.png -------------------------------------------------------------------------------- /docs/html/search/all_0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_0.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['address',['address',['../structfound__string.html#abe22b9db7b758858c8615bcce8c709eb',1,'found_string']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/all_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_1.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['data',['data',['../structfound__string.html#a0c6e7a1688668189558667ac6eafa1bd',1,'found_string']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/all_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_2.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['find',['find',['../classjis__enc.html#ada0cfd52a1ec8ee216138e2a72588a0e',1,'jis_enc::find()'],['../classshift__jis.html#ae0b748a6775bdc79d2166dfde3c6db0a',1,'shift_jis::find()']]], 4 | ['found_5fstring',['found_string',['../structfound__string.html',1,'']]] 5 | ]; 6 | -------------------------------------------------------------------------------- /docs/html/search/all_3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_3.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['get_5fis_5fbig_5fendian',['get_is_big_endian',['../classjis__enc.html#a074c250e987744a27aa03db866d2cfdb',1,'jis_enc']]], 4 | ['get_5fmin_5flen',['get_min_len',['../classjis__enc.html#ac32e7ab8d1376aca07d17748fd22fb62',1,'jis_enc']]], 5 | ['get_5fuse_5fjisx0213',['get_use_jisx0213',['../classjis__enc.html#afa01565401216a5bef8fc5f7346ac7dc',1,'jis_enc']]] 6 | ]; 7 | -------------------------------------------------------------------------------- /docs/html/search/all_4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_4.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['is_5fbig_5fendian',['is_big_endian',['../classjis__enc.html#a938547156f97dfd2d525a5205744f4ca',1,'jis_enc']]], 4 | ['is_5fjisx0201_5fprintable',['is_jisx0201_printable',['../classshift__jis.html#abe70f02f23af28e59dde449c3a21be68',1,'shift_jis']]], 5 | ['is_5fjisx0208',['is_jisx0208',['../classshift__jis.html#ab2f235118a565abbf6ca9bd831ffa4ea',1,'shift_jis']]], 6 | ['is_5fjisx0213',['is_jisx0213',['../classshift__jis.html#ae1d0ad63b01a645c1ecf7f69660b8d41',1,'shift_jis']]] 7 | ]; 8 | -------------------------------------------------------------------------------- /docs/html/search/all_5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_5.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['jstrings',['jstrings',['../index.html',1,'']]], 4 | ['jis_5fenc',['jis_enc',['../classjis__enc.html',1,'']]] 5 | ]; 6 | -------------------------------------------------------------------------------- /docs/html/search/all_6.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_6.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['min_5flen',['min_len',['../classjis__enc.html#a61a76eed4c65b7666d5354f89dfae796',1,'jis_enc']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/all_7.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_7.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['set_5fis_5fbig_5fendian',['set_is_big_endian',['../classjis__enc.html#a22a855a3769ee68e798d41548c58678b',1,'jis_enc']]], 4 | ['set_5fmin_5flen',['set_min_len',['../classjis__enc.html#a85e34f8f9960d895caf72c616046f20d',1,'jis_enc']]], 5 | ['set_5fuse_5fjisx0213',['set_use_jisx0213',['../classjis__enc.html#ad93d9e0a295fe97494a9b1dec6c9b926',1,'jis_enc']]], 6 | ['shift_5fjis',['shift_jis',['../classshift__jis.html',1,'']]] 7 | ]; 8 | -------------------------------------------------------------------------------- /docs/html/search/all_8.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/all_8.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['use_5fjisx0213',['use_jisx0213',['../classjis__enc.html#a9dbb086c214f28bf82b9304a3f027d48',1,'jis_enc']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/classes_0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/classes_0.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['found_5fstring',['found_string',['../structfound__string.html',1,'']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/classes_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/classes_1.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['jis_5fenc',['jis_enc',['../classjis__enc.html',1,'']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/classes_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/classes_2.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['shift_5fjis',['shift_jis',['../classshift__jis.html',1,'']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/close.png -------------------------------------------------------------------------------- /docs/html/search/functions_0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/functions_0.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['find',['find',['../classjis__enc.html#ada0cfd52a1ec8ee216138e2a72588a0e',1,'jis_enc::find()'],['../classshift__jis.html#ae0b748a6775bdc79d2166dfde3c6db0a',1,'shift_jis::find()']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/functions_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/functions_1.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['get_5fis_5fbig_5fendian',['get_is_big_endian',['../classjis__enc.html#a074c250e987744a27aa03db866d2cfdb',1,'jis_enc']]], 4 | ['get_5fmin_5flen',['get_min_len',['../classjis__enc.html#ac32e7ab8d1376aca07d17748fd22fb62',1,'jis_enc']]], 5 | ['get_5fuse_5fjisx0213',['get_use_jisx0213',['../classjis__enc.html#afa01565401216a5bef8fc5f7346ac7dc',1,'jis_enc']]] 6 | ]; 7 | -------------------------------------------------------------------------------- /docs/html/search/functions_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/functions_2.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['is_5fjisx0201_5fprintable',['is_jisx0201_printable',['../classshift__jis.html#abe70f02f23af28e59dde449c3a21be68',1,'shift_jis']]], 4 | ['is_5fjisx0208',['is_jisx0208',['../classshift__jis.html#ab2f235118a565abbf6ca9bd831ffa4ea',1,'shift_jis']]], 5 | ['is_5fjisx0213',['is_jisx0213',['../classshift__jis.html#ae1d0ad63b01a645c1ecf7f69660b8d41',1,'shift_jis']]] 6 | ]; 7 | -------------------------------------------------------------------------------- /docs/html/search/functions_3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/functions_3.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['set_5fis_5fbig_5fendian',['set_is_big_endian',['../classjis__enc.html#a22a855a3769ee68e798d41548c58678b',1,'jis_enc']]], 4 | ['set_5fmin_5flen',['set_min_len',['../classjis__enc.html#a85e34f8f9960d895caf72c616046f20d',1,'jis_enc']]], 5 | ['set_5fuse_5fjisx0213',['set_use_jisx0213',['../classjis__enc.html#ad93d9e0a295fe97494a9b1dec6c9b926',1,'jis_enc']]] 6 | ]; 7 | -------------------------------------------------------------------------------- /docs/html/search/mag_sel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/mag_sel.png -------------------------------------------------------------------------------- /docs/html/search/nomatches.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
    9 |
    No Matches
    10 |
    11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/html/search/pages_0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/pages_0.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['jstrings',['jstrings',['../index.html',1,'']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/search.css: -------------------------------------------------------------------------------- 1 | /*---------------- Search Box */ 2 | 3 | #FSearchBox { 4 | float: left; 5 | } 6 | 7 | #MSearchBox { 8 | white-space : nowrap; 9 | float: none; 10 | margin-top: 8px; 11 | right: 0px; 12 | width: 170px; 13 | height: 24px; 14 | z-index: 102; 15 | } 16 | 17 | #MSearchBox .left 18 | { 19 | display:block; 20 | position:absolute; 21 | left:10px; 22 | width:20px; 23 | height:19px; 24 | background:url('search_l.png') no-repeat; 25 | background-position:right; 26 | } 27 | 28 | #MSearchSelect { 29 | display:block; 30 | position:absolute; 31 | width:20px; 32 | height:19px; 33 | } 34 | 35 | .left #MSearchSelect { 36 | left:4px; 37 | } 38 | 39 | .right #MSearchSelect { 40 | right:5px; 41 | } 42 | 43 | #MSearchField { 44 | display:block; 45 | position:absolute; 46 | height:19px; 47 | background:url('search_m.png') repeat-x; 48 | border:none; 49 | width:115px; 50 | margin-left:20px; 51 | padding-left:4px; 52 | color: #909090; 53 | outline: none; 54 | font: 9pt Arial, Verdana, sans-serif; 55 | -webkit-border-radius: 0px; 56 | } 57 | 58 | #FSearchBox #MSearchField { 59 | margin-left:15px; 60 | } 61 | 62 | #MSearchBox .right { 63 | display:block; 64 | position:absolute; 65 | right:10px; 66 | top:8px; 67 | width:20px; 68 | height:19px; 69 | background:url('search_r.png') no-repeat; 70 | background-position:left; 71 | } 72 | 73 | #MSearchClose { 74 | display: none; 75 | position: absolute; 76 | top: 4px; 77 | background : none; 78 | border: none; 79 | margin: 0px 4px 0px 0px; 80 | padding: 0px 0px; 81 | outline: none; 82 | } 83 | 84 | .left #MSearchClose { 85 | left: 6px; 86 | } 87 | 88 | .right #MSearchClose { 89 | right: 2px; 90 | } 91 | 92 | .MSearchBoxActive #MSearchField { 93 | color: #000000; 94 | } 95 | 96 | /*---------------- Search filter selection */ 97 | 98 | #MSearchSelectWindow { 99 | display: none; 100 | position: absolute; 101 | left: 0; top: 0; 102 | border: 1px solid #90A5CE; 103 | background-color: #F9FAFC; 104 | z-index: 10001; 105 | padding-top: 4px; 106 | padding-bottom: 4px; 107 | -moz-border-radius: 4px; 108 | -webkit-border-top-left-radius: 4px; 109 | -webkit-border-top-right-radius: 4px; 110 | -webkit-border-bottom-left-radius: 4px; 111 | -webkit-border-bottom-right-radius: 4px; 112 | -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); 113 | } 114 | 115 | .SelectItem { 116 | font: 8pt Arial, Verdana, sans-serif; 117 | padding-left: 2px; 118 | padding-right: 12px; 119 | border: 0px; 120 | } 121 | 122 | span.SelectionMark { 123 | margin-right: 4px; 124 | font-family: monospace; 125 | outline-style: none; 126 | text-decoration: none; 127 | } 128 | 129 | a.SelectItem { 130 | display: block; 131 | outline-style: none; 132 | color: #000000; 133 | text-decoration: none; 134 | padding-left: 6px; 135 | padding-right: 12px; 136 | } 137 | 138 | a.SelectItem:focus, 139 | a.SelectItem:active { 140 | color: #000000; 141 | outline-style: none; 142 | text-decoration: none; 143 | } 144 | 145 | a.SelectItem:hover { 146 | color: #FFFFFF; 147 | background-color: #3D578C; 148 | outline-style: none; 149 | text-decoration: none; 150 | cursor: pointer; 151 | display: block; 152 | } 153 | 154 | /*---------------- Search results window */ 155 | 156 | iframe#MSearchResults { 157 | width: 60ex; 158 | height: 15em; 159 | } 160 | 161 | #MSearchResultsWindow { 162 | display: none; 163 | position: absolute; 164 | left: 0; top: 0; 165 | border: 1px solid #000; 166 | background-color: #EEF1F7; 167 | z-index:10000; 168 | } 169 | 170 | /* ----------------------------------- */ 171 | 172 | 173 | #SRIndex { 174 | clear:both; 175 | padding-bottom: 15px; 176 | } 177 | 178 | .SREntry { 179 | font-size: 10pt; 180 | padding-left: 1ex; 181 | } 182 | 183 | .SRPage .SREntry { 184 | font-size: 8pt; 185 | padding: 1px 5px; 186 | } 187 | 188 | body.SRPage { 189 | margin: 5px 2px; 190 | } 191 | 192 | .SRChildren { 193 | padding-left: 3ex; padding-bottom: .5em 194 | } 195 | 196 | .SRPage .SRChildren { 197 | display: none; 198 | } 199 | 200 | .SRSymbol { 201 | font-weight: bold; 202 | color: #425E97; 203 | font-family: Arial, Verdana, sans-serif; 204 | text-decoration: none; 205 | outline: none; 206 | } 207 | 208 | a.SRScope { 209 | display: block; 210 | color: #425E97; 211 | font-family: Arial, Verdana, sans-serif; 212 | text-decoration: none; 213 | outline: none; 214 | } 215 | 216 | a.SRSymbol:focus, a.SRSymbol:active, 217 | a.SRScope:focus, a.SRScope:active { 218 | text-decoration: underline; 219 | } 220 | 221 | span.SRScope { 222 | padding-left: 4px; 223 | } 224 | 225 | .SRPage .SRStatus { 226 | padding: 2px 5px; 227 | font-size: 8pt; 228 | font-style: italic; 229 | } 230 | 231 | .SRResult { 232 | display: none; 233 | } 234 | 235 | DIV.searchresults { 236 | margin-left: 10px; 237 | margin-right: 10px; 238 | } 239 | 240 | /*---------------- External search page results */ 241 | 242 | .searchresult { 243 | background-color: #F0F3F8; 244 | } 245 | 246 | .pages b { 247 | color: white; 248 | padding: 5px 5px 3px 5px; 249 | background-image: url("../tab_a.png"); 250 | background-repeat: repeat-x; 251 | text-shadow: 0 1px 1px #000000; 252 | } 253 | 254 | .pages { 255 | line-height: 17px; 256 | margin-left: 4px; 257 | text-decoration: none; 258 | } 259 | 260 | .hl { 261 | font-weight: bold; 262 | } 263 | 264 | #searchresults { 265 | margin-bottom: 20px; 266 | } 267 | 268 | .searchpages { 269 | margin-top: 10px; 270 | } 271 | 272 | -------------------------------------------------------------------------------- /docs/html/search/search_l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/search_l.png -------------------------------------------------------------------------------- /docs/html/search/search_m.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/search_m.png -------------------------------------------------------------------------------- /docs/html/search/search_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/search_r.png -------------------------------------------------------------------------------- /docs/html/search/searchdata.js: -------------------------------------------------------------------------------- 1 | var indexSectionsWithContent = 2 | { 3 | 0: "adfgijmsu", 4 | 1: "fjs", 5 | 2: "fgis", 6 | 3: "adimu", 7 | 4: "j" 8 | }; 9 | 10 | var indexSectionNames = 11 | { 12 | 0: "all", 13 | 1: "classes", 14 | 2: "functions", 15 | 3: "variables", 16 | 4: "pages" 17 | }; 18 | 19 | var indexSectionLabels = 20 | { 21 | 0: "All", 22 | 1: "Classes", 23 | 2: "Functions", 24 | 3: "Variables", 25 | 4: "Pages" 26 | }; 27 | 28 | -------------------------------------------------------------------------------- /docs/html/search/variables_0.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/variables_0.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['address',['address',['../structfound__string.html#abe22b9db7b758858c8615bcce8c709eb',1,'found_string']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/variables_1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/variables_1.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['data',['data',['../structfound__string.html#a0c6e7a1688668189558667ac6eafa1bd',1,'found_string']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/variables_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/variables_2.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['is_5fbig_5fendian',['is_big_endian',['../classjis__enc.html#a938547156f97dfd2d525a5205744f4ca',1,'jis_enc']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/variables_3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/variables_3.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['min_5flen',['min_len',['../classjis__enc.html#a61a76eed4c65b7666d5354f89dfae796',1,'jis_enc']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/search/variables_4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
    11 |
    Loading...
    12 |
    13 | 16 |
    Searching...
    17 |
    No Matches
    18 | 24 |
    25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/html/search/variables_4.js: -------------------------------------------------------------------------------- 1 | var searchData= 2 | [ 3 | ['use_5fjisx0213',['use_jisx0213',['../classjis__enc.html#a9dbb086c214f28bf82b9304a3f027d48',1,'jis_enc']]] 4 | ]; 5 | -------------------------------------------------------------------------------- /docs/html/shift__jis_8h_source.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: include/shift_jis.h Source File 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
    19 |
    20 | 21 | 22 | 23 | 29 | 30 | 31 |
    24 |
    jstrings 25 |  1 26 |
    27 |
    A tool for finding JIS-based character strings in binary streams
    28 |
    32 |
    33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
    52 |
    53 | 54 | 55 |
    56 | 59 |
    60 | 61 | 65 |
    66 |
    67 |
    68 |
    shift_jis.h
    69 |
    70 |
    71 |
    1 
    8 #ifndef SHIFT_JIS_H
    9 #define SHIFT_JIS_H
    10 #include <vector>
    11 #include <iostream>
    12 #include <stdint.h>
    13 #include "jis_enc.h"
    14 
    15 class shift_jis : public jis_enc {
    16  static bool is_byte2_valid(const uint8_t* c);
    17 
    18 public:
    19  shift_jis(std::istream* instream) : jis_enc(instream) {};
    20  ~shift_jis();
    21  std::vector<found_string>* find();
    28  static bool is_jisx0201_printable(const uint8_t* c);
    33  static bool is_jisx0208(const uint8_t* c_h, const uint8_t* c_l);
    38  static bool is_jisx0213(const uint8_t* c_h, const uint8_t* c_l);
    39 };
    40 
    41 #endif // SHIFT_JIS_H
    static bool is_jisx0201_printable(const uint8_t *c)
    Determines if a given byte is valid for JIS X 0201 and is printable.
    Definition: shift_jis.cpp:98
    72 |
    std::vector< found_string > * find()
    Performs the search of the data stream.
    Definition: shift_jis.cpp:25
    73 |
    static bool is_jisx0208(const uint8_t *c_h, const uint8_t *c_l)
    Determines if the given bytes make up a valid JIS X 0208 character.
    Definition: shift_jis.cpp:106
    74 |
    Abstract class for JIS based encoding classes.
    Definition: jis_enc.h:29
    75 |
    static bool is_jisx0213(const uint8_t *c_h, const uint8_t *c_l)
    Determines if the given bytes make up a valid JIS X 0213 character.
    Definition: shift_jis.cpp:172
    76 |
    Find Shift-JIS encoded strings in a byte stream.
    Definition: shift_jis.h:15
    77 |
    78 | 79 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/html/splitbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/splitbar.png -------------------------------------------------------------------------------- /docs/html/structfound__string-members.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: Member List 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
    19 |
    20 | 21 | 22 | 23 | 29 | 30 | 31 |
    24 |
    jstrings 25 |  1 26 |
    27 |
    A tool for finding JIS-based character strings in binary streams
    28 |
    32 |
    33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
    52 |
    53 | 54 | 55 |
    56 | 59 |
    60 | 61 |
    62 |
    63 |
    64 |
    found_string Member List
    65 |
    66 |
    67 | 68 |

    This is the complete list of members for found_string, including all inherited members.

    69 | 70 | 71 | 72 |
    addressfound_string
    datafound_string
    73 | 74 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /docs/html/structfound__string.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | jstrings: found_string Struct Reference 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |
    19 |
    20 | 21 | 22 | 23 | 29 | 30 | 31 |
    24 |
    jstrings 25 |  1 26 |
    27 |
    A tool for finding JIS-based character strings in binary streams
    28 |
    32 |
    33 | 34 | 35 | 38 | 39 | 40 | 46 | 47 | 48 |
    52 |
    53 | 54 | 55 |
    56 | 59 |
    60 | 61 |
    62 |
    63 | 66 |
    67 |
    found_string Struct Reference
    68 |
    69 |
    70 | 71 |

    POD structure for containing a found string. 72 | More...

    73 | 74 |

    #include <jis_enc.h>

    75 | 76 | 78 | 80 | 81 | 82 | 84 | 85 | 86 |

    77 | Public Attributes

    79 | off_t address
     The offset of the beginning of the found string relative to the start of the stream.
     
    83 | std::vector< uint8_t > data
     The extracted string data.
     
    87 |

    Detailed Description

    88 |

    POD structure for containing a found string.

    89 |
    Author
    Damian Rogers (damia.nosp@m.n@su.nosp@m.dden-.nosp@m.desu.nosp@m..net)
    90 |
    Version
    1.0
    91 |
    Date
    2017.12.30
    92 | 93 |

    The documentation for this struct was generated from the following file: 96 |
    97 | 98 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /docs/html/sync_off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/sync_off.png -------------------------------------------------------------------------------- /docs/html/sync_on.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/sync_on.png -------------------------------------------------------------------------------- /docs/html/tab_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_a.png -------------------------------------------------------------------------------- /docs/html/tab_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_b.png -------------------------------------------------------------------------------- /docs/html/tab_h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_h.png -------------------------------------------------------------------------------- /docs/html/tab_s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_s.png -------------------------------------------------------------------------------- /docs/html/tabs.css: -------------------------------------------------------------------------------- 1 | .sm{position:relative;z-index:9999}.sm,.sm ul,.sm li{display:block;list-style:none;margin:0;padding:0;line-height:normal;direction:ltr;text-align:left;-webkit-tap-highlight-color:rgba(0,0,0,0)}.sm-rtl,.sm-rtl ul,.sm-rtl li{direction:rtl;text-align:right}.sm>li>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}#doc-content{overflow:auto;display:block;padding:0;margin:0;-webkit-overflow-scrolling:touch}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace!important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0!important;-webkit-border-radius:0;border-radius:0!important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px!important;-webkit-border-radius:5px;border-radius:5px!important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0!important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px!important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}} -------------------------------------------------------------------------------- /inc/enc_cp932.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENC_CP932_HPP 2 | #define ENC_CP932_HPP 3 | 4 | #include "enc_shiftjis.hpp" 5 | 6 | namespace motoi 7 | { 8 | 9 | /** 10 | * @brief Determines if input data points to a valid MS Code Page 932 code point 11 | * 12 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the 13 | * number of bytes in the code point sequence 14 | * 15 | */ 16 | class cp932_validator : public shiftjis_validator 17 | { 18 | public: 19 | cp932_validator() 20 | { 21 | m_iconv_code = "CP932"; 22 | } 23 | 24 | uint is_valid(byte_t const * data) const override; 25 | ~cp932_validator() override = default; 26 | }; 27 | 28 | } // namespace motoi 29 | 30 | #endif -------------------------------------------------------------------------------- /inc/enc_eucjp.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENC_EUCJP_HPP 2 | #define ENC_EUCJP_HPP 3 | 4 | #include "jis_validator.hpp" 5 | 6 | namespace motoi 7 | { 8 | 9 | /** 10 | * @brief Determines if input data points to a valid EUC JP code point 11 | * 12 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the 13 | * number of bytes in the code point sequence 14 | * 15 | */ 16 | class eucjp_validator : public jis_validator 17 | { 18 | private: 19 | constexpr static char const * m_iconvcode = "EUC-JP"; 20 | 21 | public: 22 | eucjp_validator() : 23 | jis_validator(3, m_iconvcode) {}; 24 | uint is_valid(byte_t const * data) const override; 25 | ~eucjp_validator() override = default; 26 | }; 27 | 28 | } // namespace motoi 29 | #endif 30 | -------------------------------------------------------------------------------- /inc/enc_shiftjis.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENC_SHIFTJIS_HPP 2 | #define ENC_SHIFTJIS_HPP 3 | #include "jis_validator.hpp" 4 | 5 | namespace motoi 6 | { 7 | 8 | /** 9 | * @brief Determines if input data points to a valid Shift-JIS code point 10 | * 11 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the 12 | * number of bytes in the code point sequence 13 | * 14 | * @details This supports traditional Shift-JIS, which encompasses JIS X 0201 and 15 | * JIS X 0208 character sets. It does not currently support the JIS X 0213 16 | * extension set. 17 | */ 18 | class shiftjis_validator : public jis_validator 19 | { 20 | private: 21 | constexpr static char const * m_iconvcode = "SHIFT-JIS"; 22 | 23 | public: 24 | shiftjis_validator() : 25 | jis_validator(2, m_iconvcode) {}; 26 | uint is_valid(byte_t const * data) const override; 27 | ~shiftjis_validator() override = default; 28 | ; 29 | }; 30 | 31 | } // namespace motoi 32 | #endif -------------------------------------------------------------------------------- /inc/enc_validator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENCODING_HPP 2 | #define ENCODING_HPP 3 | 4 | #include "types.hpp" 5 | 6 | namespace motoi 7 | { 8 | /*! 9 | * \brief Abstract for encoding validator classes 10 | */ 11 | class encoding_validator 12 | { 13 | protected: 14 | uint m_max_seq_len; 15 | 16 | encoding_validator(uint max_seq_len) : 17 | m_max_seq_len {max_seq_len} 18 | { 19 | } 20 | 21 | public: 22 | encoding_validator() = delete; 23 | virtual ~encoding_validator() = default; 24 | 25 | /*! 26 | * \brief Determines if the given bytes are a valid byte sequence for the encoding. 27 | * Returns the number of valid bytes if true. 28 | */ 29 | virtual uint is_valid(byte_t const * data) const = 0; 30 | 31 | uint operator()(byte_t const * data) const 32 | { 33 | return is_valid(data); 34 | } 35 | 36 | [[nodiscard]] uint max_seq_len() const 37 | { 38 | return m_max_seq_len; 39 | } 40 | }; 41 | } // namespace motoi 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /inc/filesys.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file filesys.hpp 3 | * @author Motoi Productions (Damian Rogers damian@motoi.pro) 4 | * @brief File system/path utilities 5 | * 6 | * Updates: 7 | * 20211214 Initial 8 | * 20220420 Converted to basic_string, added path parsing functions 9 | * 20220720 Change stringstream to ostringstream; added file_size & concat_paths 10 | */ 11 | 12 | #ifndef __MOTOI__FILESYS_HPP 13 | #define __MOTOI__FILESYS_HPP 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | template 21 | std::basic_string concat_paths(StringT... paths) 22 | { 23 | size_t path_count = sizeof...(paths); 24 | size_t u = 0; 25 | ostringstream oss; 26 | for(auto const & s : { paths... }) 27 | { 28 | ++u; 29 | if(s.empty()) 30 | continue; 31 | oss << s; 32 | if(s.back() != '/' && u < path_count) 33 | oss << '/'; 34 | } 35 | 36 | return oss.str(); 37 | } 38 | 39 | template 40 | struct stat stat(std::basic_string const & path) 41 | { 42 | static struct stat status; 43 | if(::stat(path, &status) != 0) 44 | { 45 | std::basic_ostringstream ss; 46 | ss << "Could not open path " << path << ": " << strerror(errno); 47 | throw runtime_error(ss.str()); 48 | } 49 | return status; 50 | } 51 | 52 | template 53 | bool exists(std::basic_string const & path) 54 | { 55 | static struct stat status; 56 | return (::stat(path.c_str(), &status) == 0); 57 | } 58 | 59 | template 60 | size_t file_size(std::basic_string const & path) 61 | { 62 | if(!exists(path)) 63 | { 64 | std::basic_ostringstream oss; 65 | oss << "Path " << path << " does not exist"; 66 | throw runtime_error(oss.str()); 67 | } 68 | 69 | static struct stat status; 70 | if(::stat(path.c_str(), &status) != 0) 71 | { 72 | std::basic_ostringstream oss; 73 | oss << "Could not open path " << path << ": " << strerror(errno); 74 | throw runtime_error(oss.str()); 75 | } 76 | return status.st_size; 77 | } 78 | 79 | template 80 | std::ifstream 81 | ifstream_checked(std::basic_string const & path, 82 | std::basic_string const & purpose = L"read") 83 | { 84 | std::ifstream ifs(path); 85 | if(!ifs.good()) 86 | { 87 | std::basic_ostringstream oss; 88 | oss << "Could not open input path \"" << path << "\" for " << purpose 89 | << ": " << strerror(errno); 90 | throw std::runtime_error(oss.str()); 91 | } 92 | return ifs; 93 | } 94 | 95 | template 96 | std::ofstream 97 | ofstream_checked(std::basic_string const & path, 98 | std::basic_string const & purpose = L"write") 99 | { 100 | std::ofstream ofs(path); 101 | if(!ofs.good()) 102 | { 103 | std::basic_ostringstream oss; 104 | oss << "Could not open output path \"" << path << "\" for " << purpose 105 | << ": " << strerror(errno); 106 | throw runtime_error(oss.str()); 107 | } 108 | return ofs; 109 | } 110 | 111 | template 112 | std::basic_string 113 | strip_extension(std::basic_string const & path) 114 | { 115 | auto i_at { path.find_last_of('.') }; 116 | if(i_at == std::string::npos) 117 | return path; 118 | return path.substr(0, i_at); 119 | } 120 | 121 | template 122 | std::basic_string 123 | filename_from_path(std::basic_string const & path) 124 | { 125 | auto i_at { path.find_last_of('/') }; 126 | if(i_at == std::string::npos) 127 | return path; 128 | return path.substr(0, i_at); 129 | } 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /inc/fstreams.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fstreams.hpp 3 | * @author Damian Rogers (damian@motoi.pro) 4 | * @copyright Motoi Productions / Released under MIT License 5 | * @brief Checked file streams 6 | * 7 | * Updates: 8 | * 20220916 Initial 9 | * 20221118 Changed exception to std::system_error 10 | */ 11 | 12 | #ifndef __MOTOI__FSTREAMS_HPP 13 | #define __MOTOI__FSTREAMS_HPP 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace motoi 21 | { 22 | 23 | template 24 | std::ifstream ifstream_checked( 25 | std::basic_string const & path, std::basic_string const & purpose = "read") 26 | { 27 | std::ifstream ifs(path); 28 | if (! ifs.good()) 29 | { 30 | std::basic_ostringstream oss; 31 | oss << "Could not open input path \"" << path << "\" for " << purpose; 32 | throw std::system_error(errno, std::system_category(), oss.str()); 33 | } 34 | return ifs; 35 | } 36 | 37 | template 38 | std::ofstream ofstream_checked( 39 | std::basic_string const & path, std::basic_string const & purpose = "write") 40 | { 41 | std::ofstream ofs(path); 42 | if (! ofs.good()) 43 | { 44 | std::basic_ostringstream oss; 45 | oss << "Could not open output path \"" << path << "\" for " << purpose << ": " << strerror(errno); 46 | throw std::system_error(errno, std::system_category(), oss.str()); 47 | } 48 | return ofs; 49 | } 50 | } // namespace motoi 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /inc/iconv_encoding.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOTOI__ICONV_ENCODING_HPP 2 | #define __MOTOI__ICONV_ENCODING_HPP 3 | 4 | namespace motoi 5 | { 6 | 7 | /** 8 | * @brief Abstract class for specifying an iconv encoding code 9 | * 10 | */ 11 | class iconv_encoding 12 | { 13 | protected: 14 | char const * m_iconv_code; 15 | 16 | iconv_encoding(char const * iconv_code) : 17 | m_iconv_code {iconv_code} 18 | { 19 | } 20 | 21 | public: 22 | iconv_encoding() = delete; 23 | virtual ~iconv_encoding() = default; 24 | 25 | [[nodiscard]] char const * iconv_code() const 26 | { 27 | return m_iconv_code; 28 | } 29 | }; 30 | 31 | } // namespace motoi 32 | #endif 33 | -------------------------------------------------------------------------------- /inc/jis_validator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOTOI__JIS_VALIDATOR_HPP 2 | #define __MOTOI__JIS_VALIDATOR_HPP 3 | 4 | #include "enc_validator.hpp" 5 | #include "iconv_encoding.hpp" 6 | 7 | namespace motoi 8 | { 9 | /** 10 | * @brief Abstract class for JIS character set encoding validators 11 | */ 12 | class jis_validator : public encoding_validator, public iconv_encoding 13 | { 14 | protected: 15 | bool m_include_crlf {false}; 16 | bool m_skip_jis0201 {false}; 17 | 18 | jis_validator(size_t max_seq_len, char const * iconv_code) : 19 | encoding_validator(max_seq_len), 20 | iconv_encoding(iconv_code) 21 | { 22 | } 23 | 24 | public: 25 | jis_validator() = delete; 26 | 27 | void include_crlf(bool include_crlf) 28 | { 29 | m_include_crlf = include_crlf; 30 | } 31 | 32 | [[nodiscard]] bool include_crlf() const 33 | { 34 | return m_include_crlf; 35 | } 36 | 37 | void skip_jis0201(bool skip_jis0201) 38 | { 39 | m_skip_jis0201 = skip_jis0201; 40 | } 41 | 42 | [[nodiscard]] bool skip_jis0201() const 43 | { 44 | return m_skip_jis0201; 45 | } 46 | }; 47 | 48 | } // namespace motoi 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /inc/jstrings.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __MOTOI__JSTRINGS_HPP 2 | #define __MOTOI__JSTRINGS_HPP 3 | 4 | #include "enc_validator.hpp" 5 | #include "types.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | namespace motoi 11 | { 12 | 13 | constexpr static size_t const default_match_length {5}; 14 | 15 | // buffer size is 4mb 16 | constexpr static size_t const buffer_size {0x100000 * 4}; 17 | 18 | /** 19 | * @brief Contains an array of data identified as a valid 20 | * string of text for a certain encoding. First value is 21 | * the offset in the source data in which the string was 22 | * found; second value is the vector of data. 23 | */ 24 | using found_string = std::pair>; 25 | 26 | /** 27 | * @brief Finds strings matching a given encoding within binary data 28 | */ 29 | std::vector find( 30 | std::istream & input, encoding_validator const & validator, size_t const minimum_match_length = default_match_length); 31 | 32 | } // namespace motoi 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /inc/types.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file types.hpp 3 | * @author Damian Rogers (damian@motoi.pro) 4 | * @copyright Motoi Productions / Released under MIT License 5 | * @brief Commonly used typedefs 6 | * 7 | * Updates: 8 | * 20200101 Initial 9 | */ 10 | 11 | #ifndef __MOTOI__TYPES_HPP 12 | #define __MOTOI__TYPES_HPP 13 | 14 | //#include 15 | #include 16 | #include 17 | 18 | using uchar = unsigned char; 19 | using uint = unsigned int; 20 | using ushort = unsigned short; 21 | using ulong = unsigned long; 22 | 23 | using u8 = uint8_t; 24 | using u16 = uint16_t; 25 | using u32 = uint32_t; 26 | 27 | using s8 = int8_t; 28 | using s16 = int16_t; 29 | using s32 = int32_t; 30 | 31 | using byte_t = uint8_t; 32 | 33 | template 34 | using uptr = std::unique_ptr; 35 | template 36 | using sptr = std::shared_ptr; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /inc/uniconv.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ENC_CP932_HPP 2 | #define ENC_CP932_HPP 3 | 4 | #include "enc_shiftjis.hpp" 5 | 6 | namespace motoi 7 | { 8 | 9 | /** 10 | * @brief Determines if input data points to a valid MS Code Page 932 code point 11 | * 12 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the 13 | * number of bytes in the code point sequence 14 | * 15 | */ 16 | class cp932_validator : public shiftjis_validator 17 | { 18 | public: 19 | cp932_validator() 20 | { 21 | m_iconv_code = "CP932"; 22 | } 23 | 24 | uint is_valid(byte_t const * data) const override; 25 | ~cp932_validator() override = default; 26 | }; 27 | 28 | } // namespace motoi 29 | 30 | #endif /** \ 31 | * @file uniconv.hpp \ 32 | * @author Damian Rogers (damian@motoi.pro) \ 33 | * @copyright Motoi Productions / Released under MIT License \ 34 | * @brief Wrapper for iconv to convert to utf8 \ 35 | * \ 36 | * Updates: \ 37 | * 20221120 Initial \ 38 | */ 39 | 40 | #ifndef __MOTOI__UNICONV_HPP 41 | #define __MOTOI__UNICONV_HPP 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | namespace motoi 51 | { 52 | class uniconv 53 | { 54 | private: 55 | constexpr static char const * m_convcode = "UTF-8"; 56 | iconv_t m_conv; 57 | 58 | public: 59 | uniconv(uniconv const &) = delete; 60 | uniconv & operator=(uniconv const &) = delete; 61 | uniconv(uniconv &&) = default; 62 | uniconv & operator=(uniconv &&) = default; 63 | 64 | uniconv(const char * fromcode) 65 | { 66 | m_conv = iconv_open(uniconv::m_convcode, fromcode); 67 | 68 | if (m_conv == (iconv_t) -1) 69 | { 70 | std::ostringstream oss; 71 | oss << "Could not open converter using encoding " << fromcode; 72 | throw std::system_error(errno, std::generic_category(), oss.str()); 73 | } 74 | } 75 | 76 | ~uniconv() 77 | { 78 | iconv_close(m_conv); 79 | } 80 | 81 | std::string convert(char * in, size_t in_size) 82 | { 83 | std::string out(in_size * 2, '\0'); 84 | 85 | char * p_in = in; 86 | char * p_out = (char *) out.data(); 87 | size_t out_size = out.size(); 88 | size_t result; 89 | 90 | // clang-format off 91 | while ((result = iconv 92 | (m_conv, &p_in, &in_size, &p_out, &out_size) 93 | == -1)) 94 | // clang-format on 95 | { 96 | // received some kind of error status 97 | if (errno == EILSEQ || errno == EINVAL) 98 | { 99 | ptrdiff_t pos = p_in - in; 100 | std::ostringstream oss; 101 | oss << "Encountered invalid byte sequence at position " << pos; 102 | throw std::system_error(errno, std::generic_category(), oss.str()); 103 | } 104 | 105 | // output buffer out of space, resize and retry 106 | if (errno == E2BIG) 107 | { 108 | ptrdiff_t pos = p_out - out.data(); 109 | out.resize(out.size() * 2); 110 | p_out = out.data() + pos; 111 | out_size = out.size() - pos; 112 | } 113 | } 114 | 115 | out.resize(p_out - out.data()); 116 | 117 | return out; 118 | } 119 | 120 | std::string convert(std::vector & in) 121 | { 122 | return convert(in.data(), in.size()); 123 | } 124 | 125 | std::string convert(std::vector & in) 126 | { 127 | return convert((char *) in.data(), in.size()); 128 | } 129 | 130 | std::string convert(std::string & in) 131 | { 132 | return convert(in.data(), in.size()); 133 | } 134 | }; 135 | } // namespace motoi 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /inc/usage.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file usage.hpp 3 | * @author Motoi Productions (Damian Rogers damian@motoi.pro) 4 | * @brief Display program options/usage 5 | * 6 | * Updates: 7 | * 20220415 Initial 8 | * 20220722 Using const in show_usage parameters 9 | * 20220914 Added show_version 10 | * 20221118 Added copyright field; change to non-wide characters to properly support 11 | * UTF-8; updates to match changes to match app.hpp.cfg 12 | */ 13 | 14 | #ifndef __MOTOI__USAGE_HPP 15 | #define __MOTOI__USAGE_HPP 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace motoi 23 | { 24 | struct option_details 25 | { 26 | bool const required; 27 | char const * desc; 28 | char const * arg_type; 29 | }; 30 | 31 | /** 32 | * @brief Displays program version 33 | * 34 | * @param output stream to write the version 35 | */ 36 | void show_version(std::ostream & output); 37 | 38 | /** 39 | * @brief Displays program options and usage 40 | * 41 | * @param opts array of @c option structs; final entry should be all zero 42 | * @param details array of @c option_details structs; must be the same size and 43 | * order as @c opts 44 | * @param output stream to write the usage 45 | */ 46 | void show_usage(option const * opts, option_details const * details, std::ostream & output = std::cout); 47 | 48 | } // namespace motoi 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/app.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Damian R (damian@motoi.pro) 3 | * @brief A tool for finding JIS-based Japanese text in binary data. 4 | * @version 1.6 5 | * 6 | * @copyright ©2018 Motoi Productions / Released under MIT License 7 | * 8 | */ 9 | 10 | #ifndef __MOTOI__APP_HPP 11 | #define __MOTOI__APP_HPP 12 | 13 | /* 14 | These values should be set within CMakeLists.txt 15 | */ 16 | namespace app 17 | { 18 | static unsigned int const version_major {1}; 19 | static unsigned int const version_minor {6}; 20 | static unsigned int const version_patch {}; 21 | static char const * version {"1.6"}; 22 | 23 | static char const * name {"jstrings"}; 24 | static char const * brief {"A tool for finding JIS-based Japanese text in binary data."}; 25 | static char const * copyright {"©2018 Motoi Productions / Released under MIT License"}; 26 | static char const * contact {"Damian R (damian@motoi.pro)"}; 27 | static char const * website {"https://github.com/drojaazu"}; 28 | } // namespace app 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/app.hpp.cfg: -------------------------------------------------------------------------------- 1 | /** 2 | * @author @PROJECT_CONTACT@ 3 | * @brief @PROJECT_BRIEF@ 4 | * @version @PROJECT_VERSION@ 5 | * 6 | * @copyright @PROJECT_COPYRIGHT@ 7 | * 8 | */ 9 | 10 | #ifndef __MOTOI__APP_HPP 11 | #define __MOTOI__APP_HPP 12 | 13 | /* 14 | These values should be set within CMakeLists.txt 15 | */ 16 | namespace app 17 | { 18 | static unsigned int const version_major {@PROJECT_VERSION_MAJOR@}; 19 | static unsigned int const version_minor {@PROJECT_VERSION_MINOR@}; 20 | static unsigned int const version_patch {@PROJECT_VERSION_PATCH@}; 21 | static char const * version {"@PROJECT_VERSION@"}; 22 | 23 | static char const * name {"@PROJECT_NAME@"}; 24 | static char const * brief {"@PROJECT_BRIEF@"}; 25 | static char const * copyright {"@PROJECT_COPYRIGHT@"}; 26 | static char const * contact {"@PROJECT_CONTACT@"}; 27 | static char const * website {"@PROJECT_WEBSITE@"}; 28 | } // namespace app 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/enc_cp932.cpp: -------------------------------------------------------------------------------- 1 | #include "enc_cp932.hpp" 2 | 3 | namespace motoi 4 | { 5 | 6 | uint cp932_validator::is_valid(byte_t const * data) const 7 | { 8 | u8 valid_count = shiftjis_validator::is_valid(data); 9 | if (valid_count > 0) 10 | return valid_count; 11 | else 12 | { 13 | u8 c_hi {*data}; 14 | u8 c_lo {*(data + 1)}; 15 | 16 | /* 17 | ku 13 - lead byte 0x87 - NEC tokushu moji 18 | 0x40 to 0x5d, 0x5f to 0x75, 0x7e, 0x80 to 0x8f, 0x93, 0x94, 19 | 0x98, 0x99 ku 89-92 - lead byte 0xed, 0xee - NEC sentei IBM kakuchou moji 20 | 0x40 to 0x7e, 0x80 to 0xfc 21 | ku 115-119 - lead byte 0xfa to 0xfc - IBM kakuchou moji 22 | 0x40 to 0x7e, 0x80 to 0xfc (except 0xfc: 0x40 to 0x4b) 23 | */ 24 | switch (c_hi) 25 | { 26 | case 0x87: 27 | if (((c_lo >= 0x40) & (c_lo <= 0x5d)) || ((c_lo >= 0x5f) & (c_lo <= 0x75)) || (c_lo == 0x7e) || 28 | ((c_lo >= 0x80) & (c_lo <= 0x8f)) || (c_lo == 0x93) || (c_lo == 0x94) || (c_lo == 0x98) || (c_lo == 0x99)) 29 | return 2; 30 | break; 31 | case 0xed: 32 | case 0xee: 33 | case 0xfa: 34 | case 0xfb: 35 | // 0x7f should already be excluded from the base shiftjis check 36 | if ((c_lo >= 0x40) & (c_lo <= 0xfc)) 37 | return 2; 38 | case 0xfc: 39 | if ((c_lo >= 0x40) & (c_lo <= 0x4b)) 40 | return 2; 41 | } 42 | } 43 | return 0; 44 | } 45 | } // namespace motoi 46 | -------------------------------------------------------------------------------- /src/enc_eucjp.cpp: -------------------------------------------------------------------------------- 1 | #include "enc_eucjp.hpp" 2 | 3 | namespace motoi 4 | { 5 | 6 | uint eucjp_validator::is_valid(byte_t const * data) const 7 | { 8 | 9 | u8 c_hi {*data}; 10 | 11 | // ASCII except control characters (including tab and conditionally newlines) 12 | if ((c_hi == 0x09) || ((c_hi >= 0x20) & (c_hi <= 0x7e)) || (m_include_crlf && (c_hi == 0x0a))) 13 | return 1; 14 | 15 | u8 c_lo {*(data + 1)}; 16 | 17 | if (m_include_crlf && ((c_hi == 0x0d) && (c_lo == 0x0a))) 18 | return 2; 19 | 20 | /* 21 | JIS X 0201 22 | the raw 0201 code prefixed with 0x8E 23 | */ 24 | if (! m_skip_jis0201 && (c_hi == 0x8e)) 25 | { 26 | if ((c_lo >= 0xa1) & (c_lo <= 0xdf)) 27 | return 2; 28 | } 29 | /* 30 | JIS X 0208 31 | standard 0208 code with top bit set 32 | Partial ku 33 | 0xa2 - 0xa1 to 0xae, 0xba to 0xc1, 0xca to 0xd0, 0xdc to 0xea, 0xf2 to 0xf9, 0xfe 34 | 0xa3 - 0xb0 to 0xb9, 0xc1 to 0xda, 0xe1 to 0xfa 35 | 0xa4 - 0xa1 to 0xf3 36 | 0xa5 - 0xa1 to 0xf6 37 | 0xa6 - 0xa1 to 0xb8, 0xc1 to 0xd8 38 | 0xa7 - 0xa1 to 0xc1, 0xd1 to 0xf1 39 | 0xa8 - 0xa1 to 0xc0 40 | 0xcf - 0xa1 to 0xd3 41 | 0xf4 - 0xa1 to 0xa6 42 | */ 43 | switch (c_hi) 44 | { 45 | case 0xa2: 46 | if ((c_lo >= 0xa1) & (c_lo <= 0xae) || (c_lo >= 0xba) & (c_lo <= 0xc1) || (c_lo >= 0xca) & (c_lo <= 0xd0) || 47 | (c_lo >= 0xdc) & (c_lo <= 0xea) || (c_lo >= 0xf2) & (c_lo <= 0xf9) || (c_lo == 0xfe)) 48 | return 2; 49 | return false; 50 | case 0xa3: 51 | if ((c_lo >= 0xb0) & (c_lo <= 0xb9) || (c_lo >= 0xc1) & (c_lo <= 0xda) || (c_lo >= 0xe1) & (c_lo <= 0xfa)) 52 | return 2; 53 | return false; 54 | case 0xa4: 55 | if ((c_lo >= 0xa1) & (c_lo <= 0xf3)) 56 | return 2; 57 | return false; 58 | case 0xa5: 59 | if ((c_lo >= 0xa1) & (c_lo <= 0xf6)) 60 | return 2; 61 | return false; 62 | case 0xa6: 63 | if ((c_lo >= 0xa1) & (c_lo <= 0xb8) || (c_lo >= 0xc1) & (c_lo <= 0xd8)) 64 | return 2; 65 | return false; 66 | case 0xa7: 67 | if ((c_lo >= 0xa1) & (c_lo <= 0xc1) || (c_lo >= 0xd1) & (c_lo <= 0xf1)) 68 | return 2; 69 | return false; 70 | case 0xa8: 71 | if ((c_lo >= 0xa1) & (c_lo <= 0xc0)) 72 | return 2; 73 | return false; 74 | case 0xcf: 75 | if ((c_lo >= 0xa1) & (c_lo <= 0xd3)) 76 | return 2; 77 | return false; 78 | case 0xf4: 79 | if ((c_lo >= 0xa1) & (c_lo <= 0xa6)) 80 | return 2; 81 | return false; 82 | } 83 | 84 | /* 85 | Full ku (lo bytes 0xa1 to 0xfe) 86 | 0xa1, 0xb0 to 0xce, 0xd0 to 0xf3 87 | */ 88 | if (((c_hi == 0xa1) || ((c_hi >= 0xb0) & (c_hi <= 0xce)) || ((c_hi >= 0xd0) & (c_hi <= 0xf3))) && 89 | ((c_lo >= 0xa1) & (c_lo <= 0xfe))) 90 | return 2; 91 | 92 | /* 93 | JIS X 0212 94 | 0208 extension, so only a few ku are present 95 | 0208 code prefixed by 0x8f 96 | */ 97 | if (c_hi == 0x8f) 98 | { 99 | u8 c_md = c_lo; 100 | c_lo = *(data + 2); 101 | 102 | /* 103 | Partial ku: 104 | 0xa2 - 0xaf to 0xb9, 0xc2 to 0xc4, 0xeb to 0xf1 105 | 0xa6 - 0xe1 to 0xe5, 0xe7, 0xe9, 0xea, 0xec, 0xf1 to 0xfc 106 | 0xa7 - 0xc2 to 0xce, 0xf2 to 0xfe 107 | 0xa9 - 0xa1, 0xa2, 0xa4, 0xa6, 0xa8, 0xa9, 0xab to 0xad, 0xaf, 0xb0, 0xc1 to 0xd0 108 | 0xaa - 0xa1 to 0xb8, 0xba to 0xf7 109 | 0xab - 0xa1 to 0xbb, 0xbd to 0xc3, 0xc5 to 0xf7 110 | 0xed - 0xa1 to 0xe3 111 | */ 112 | switch (c_md) 113 | { 114 | case 0xa2: 115 | if (((c_lo >= 0xaf) & (c_lo <= 0xb9)) || ((c_lo >= 0xc2) & (c_lo <= 0xc4)) || ((c_lo >= 0xeb) & (c_lo <= 0xf1))) 116 | return 3; 117 | return false; 118 | case 0xa6: 119 | if (((c_lo >= 0xe1) & (c_lo <= 0xe5)) || (c_lo == 0xe7) || (c_lo == 0xe9) || (c_lo == 0xea) || (c_lo == 0xec) || 120 | ((c_lo >= 0xf1) & (c_lo <= 0xfc))) 121 | return 3; 122 | return false; 123 | case 0xa7: 124 | if (((c_lo >= 0xc2) & (c_lo <= 0xce)) || ((c_lo >= 0xf2) & (c_lo <= 0xfe))) 125 | return 3; 126 | return false; 127 | case 0xa9: 128 | if ((c_lo == 0xa1) || (c_lo == 0xa2) || (c_lo == 0xa4) || (c_lo == 0xa6) || (c_lo == 0xa8) || (c_lo == 0xa9) || 129 | ((c_lo >= 0xab) & (c_lo <= 0xad)) || (c_lo == 0xaf) || (c_lo == 0xb0) || ((c_lo >= 0xc1) & (c_lo <= 0xd0))) 130 | return 3; 131 | return false; 132 | case 0xaa: 133 | if (((c_lo >= 0xa1) & (c_lo <= 0xb8)) || ((c_lo >= 0xba) & (c_lo <= 0xf7))) 134 | return 3; 135 | return false; 136 | case 0xab: 137 | if (((c_lo >= 0xa1) & (c_lo <= 0xbb)) || ((c_lo >= 0xbd) & (c_lo <= 0xc3)) || ((c_lo >= 0xc5) & (c_lo <= 0xf7))) 138 | return 3; 139 | return false; 140 | case 0xed: 141 | if ((c_lo >= 0xa1) & (c_lo <= 0xe3)) 142 | return 3; 143 | return false; 144 | } 145 | 146 | // Full ku (lo bytes 0xa1 to 0xfe) 147 | // 0xb0 to 0xec 148 | if (((c_md >= 0xb0) & (c_md <= 0xec)) && ((c_lo >= 0xa1) & (c_lo <= 0xfe))) 149 | return 3; 150 | } 151 | return false; 152 | } 153 | 154 | } // namespace motoi 155 | -------------------------------------------------------------------------------- /src/enc_shiftjis.cpp: -------------------------------------------------------------------------------- 1 | #include "enc_shiftjis.hpp" 2 | 3 | namespace motoi 4 | { 5 | 6 | uint shiftjis_validator::is_valid(byte_t const * data) const 7 | { 8 | /* 9 | JIS X 0201 - 8-bit characters (including 7-bit ASCII) 10 | excludes non-printable (control code) and reserved bytes 11 | (but include tab (0x09)) 12 | */ 13 | u8 c_hi {*data}; 14 | // clang-format off 15 | if ((c_hi == 0x09) || (c_hi >= 0x20) & (c_hi <= 0x7e) 16 | || ((! m_skip_jis0201) && ((c_hi >= 0xa1) & (c_hi <= 0xdf))) 17 | ) 18 | return 1; 19 | // clang-format on 20 | 21 | // JIS X 0208 - 16 bit characters 22 | u8 c_lo {*(data + 1)}; 23 | 24 | if (m_include_crlf && (c_hi == 0x0a || (c_hi == 0x0d && c_lo == 0x0a))) 25 | return 1; 26 | 27 | // sjis lower byte can never be these values 28 | if ((c_lo >= 0x0) & (c_lo <= 0x3f) || (c_lo == 0x7f) || (c_lo >= 0xfd) & (c_lo <= 0xff)) 29 | return 0; 30 | 31 | /* 32 | Partial fields (always excluding 0x7f) 33 | 0x81 - 0x40 to 0xac, 0xb8 to 0xbf, 0xc8 to 0xce, 0xda to 0xe8, 0xf0 to 34 | 0xf7, 0xfc 0x82 - 0x4f to 0x58, 0x60 to 0x79, 0x81 to 0x9a, 0x9f to 0xf1 35 | 0x83 - 0x40 to 0x96, 0x9f to 0xb6, 0xbf to 0xd6 36 | 0x84 - 0x40 to 0x60, 0x70 to 0x91, 0x9f to 0xbe 37 | 0x88 - 0x9f to 0xfc 38 | 0x98 - 0x40 to 0x72, 0x9f to 0xfc 39 | 0xea - 0x40 to 0xa4 40 | */ 41 | switch (c_hi) 42 | { 43 | case 0x81: 44 | if ((c_lo >= 0x40) & (c_lo <= 0xac) || (c_lo >= 0xb8) & (c_lo <= 0xbf) || (c_lo >= 0xc8) & (c_lo <= 0xce) || 45 | (c_lo >= 0xda) & (c_lo <= 0xe8) || (c_lo >= 0xf0) & (c_lo <= 0xf7) || (c_lo == 0xfc)) 46 | return 2; 47 | return 0; 48 | case 0x82: 49 | if ((c_lo >= 0x4f) & (c_lo <= 0x58) || (c_lo >= 0x60) & (c_lo <= 0x79) || (c_lo >= 0x81) & (c_lo <= 0x9a) || 50 | (c_lo >= 0x9f) & (c_lo <= 0xf1)) 51 | return 2; 52 | return 0; 53 | case 0x83: 54 | if ((c_lo >= 0x40) & (c_lo <= 0x96) || (c_lo >= 0x9f) & (c_lo <= 0xb6) || (c_lo >= 0xbf) & (c_lo <= 0xd6)) 55 | return 2; 56 | return 0; 57 | case 0x84: 58 | if ((c_lo >= 0x40) & (c_lo <= 0x60) || (c_lo >= 0x70) & (c_lo <= 0x91) || (c_lo >= 0x9f) & (c_lo <= 0xbe)) 59 | return 2; 60 | return 0; 61 | case 0x88: 62 | if ((c_lo >= 0x9f) & (c_lo <= 0xfc)) 63 | return 2; 64 | return 0; 65 | case 0x98: 66 | if ((c_lo >= 0x40) & (c_lo <= 0x72) || (c_lo >= 0x9f) & (c_lo <= 0xfc)) 67 | return 2; 68 | return 0; 69 | case 0xea: 70 | if ((c_lo >= 0x40) & (c_lo <= 0xa4)) 71 | return 2; 72 | return 0; 73 | } 74 | 75 | /* 76 | Full fields (0x40 to 0xfc, excluding 0x7f) 77 | 0x89 to 0x97, 0x99 to 0x9f, 0xe0 to 0xe9 78 | */ 79 | // clang-format off 80 | if ( 81 | (((c_hi >= 0x89) & (c_hi <= 0x97)) 82 | || ((c_hi >= 0x99) & (c_hi <= 0x9f)) 83 | || ((c_hi >= 0xe0) & (c_hi <= 0xe9)) 84 | ) 85 | && ((c_lo >= 0x40) & (c_lo <= 0xfc)) 86 | ) 87 | return 2; 88 | // clang-format on 89 | return 0; 90 | } 91 | 92 | } // namespace motoi 93 | -------------------------------------------------------------------------------- /src/jstrings.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "jstrings.hpp" 3 | #include "enc_validator.hpp" 4 | #include 5 | #include 6 | #include 7 | 8 | namespace motoi 9 | { 10 | 11 | using namespace std; 12 | 13 | vector find(istream & is, encoding_validator const & enc, size_t const minimum_match_length) 14 | { 15 | 16 | vector out; 17 | 18 | // work buffer - input file is read/procesed in chunks of buffer_size bytes 19 | uptr buffer(new byte_t[buffer_size]); 20 | byte_t * p_buffer = buffer.get(); 21 | 22 | /* 23 | Pointer to the end of the data within the buffer 24 | This will be (buffer + buffer_size) most of the time except for the 25 | final read of the stream where there will likely be less than a full 26 | buffer worth of data 27 | */ 28 | byte_t * p_buffer_end {nullptr}; 29 | 30 | // the current byte(s) we are examining within the buffer 31 | byte_t * p_curr {nullptr}; 32 | 33 | /* 34 | Due to the variable length nature of bytes, we set a "boundary" 35 | near the end of the buffer, at: 36 | (p_buffer_end - enc_validator.max_sequence_length) 37 | This prevents false negatives when a multibyte character is split 38 | across buffer reads. 39 | Once the read pointer is past the boundary, the remaining, 40 | unprocessed bytes are moved to the front of the buffer 41 | and the buffer is filled with the next read from the stream 42 | less the bytes moved to the front. 43 | */ 44 | byte_t * p_boundary {nullptr}; 45 | 46 | // Track our location within the source data 47 | size_t source_offset {0}; 48 | 49 | /* 50 | The address of the start of a valid string in terms of 51 | the *source data* rather than the buffer 52 | */ 53 | size_t valid_start_offset {0}; 54 | 55 | /* 56 | Start and end pointers for a string of valid data with 57 | the buffer 58 | */ 59 | byte_t * p_valid_start {nullptr}; 60 | byte_t * p_valid_end {nullptr}; 61 | 62 | /* 63 | If p_valid_start is set and we have not yet determined the 64 | end of the string, but we have reached the boundary and 65 | are reloading the buffer, the valid data found so far needs 66 | to be perserved in a secondary buffer. In this case, we 67 | go ahead and allocate a new found_string with the data so 68 | far and set the flag below, which will indicate that the 69 | remaining data should be appended to the last found_string 70 | in the list. 71 | */ 72 | bool crossed_boundary {false}; 73 | 74 | size_t valid_string_length {0}; 75 | size_t advance_read_pointer_count {0}; 76 | int remaining_count {0}; 77 | 78 | while (is.good()) 79 | { 80 | // prepare buffer 81 | if (p_valid_start != nullptr) 82 | { 83 | // we have a valid string in progress 84 | out.push_back({valid_start_offset, {p_valid_start, p_curr}}); 85 | crossed_boundary = true; 86 | } 87 | 88 | // always clear the buffer since we don't know how many bytes 89 | // will actually be read 90 | memset((char *) (p_buffer + remaining_count), 0, (buffer_size - remaining_count)); 91 | is.read((char *) (p_buffer + remaining_count), (buffer_size - remaining_count)); 92 | streamsize buffer_read_size = is.gcount() + remaining_count; 93 | 94 | p_buffer_end = p_buffer + buffer_read_size; 95 | p_curr = p_buffer; 96 | 97 | /* 98 | if we're on the final buffer read (is.eof()) or if the data we read is smaller 99 | than the max sequence length of the encoding, then don't bother setting a boundary 100 | */ 101 | if ((buffer_read_size < enc.max_seq_len()) || is.eof()) 102 | p_boundary = p_buffer + buffer_read_size; 103 | else 104 | p_boundary = p_buffer + (buffer_read_size - enc.max_seq_len()); 105 | 106 | while (p_curr < p_boundary) 107 | { 108 | auto valid_byte_count {enc(p_curr)}; 109 | if (valid_byte_count > 0) 110 | { 111 | // found a valid sequence 112 | if (p_valid_start == nullptr) 113 | { 114 | // starting a new valid string 115 | p_valid_start = p_curr; 116 | valid_start_offset = source_offset; 117 | } 118 | 119 | ++valid_string_length; 120 | // move to the next byte after this valid grouping 121 | advance_read_pointer_count = valid_byte_count; 122 | } 123 | else 124 | { 125 | // hit an invalid byte 126 | if (p_valid_start != nullptr) 127 | { 128 | if (valid_string_length >= minimum_match_length) 129 | { 130 | // valid_start was set, so we're finishing a valid string 131 | p_valid_end = p_curr; 132 | if (crossed_boundary) 133 | { 134 | out.back().second.insert(out.back().second.end(), p_buffer, p_valid_end); 135 | } 136 | else 137 | { 138 | // clang-format off 139 | out.push_back ({ 140 | valid_start_offset, 141 | {p_valid_start, p_valid_end} 142 | }); 143 | // clang-format on 144 | } 145 | 146 | if (out.size() >= out.max_size()) 147 | return out; 148 | } 149 | 150 | if (crossed_boundary) 151 | crossed_boundary = false; 152 | 153 | p_valid_start = nullptr; 154 | p_valid_end = nullptr; 155 | } 156 | 157 | valid_string_length = 0; 158 | 159 | // move to next byte as this one was invalid 160 | advance_read_pointer_count = 1; 161 | } 162 | 163 | // increase pointer and offset 164 | p_curr += advance_read_pointer_count; 165 | source_offset += advance_read_pointer_count; 166 | } 167 | 168 | remaining_count = p_buffer_end - p_curr; 169 | if (remaining_count > 0) 170 | copy(p_curr, p_buffer_end, p_buffer); 171 | } 172 | 173 | // finished processing all input 174 | // check if we have an unfinished valid string 175 | if ((p_valid_start != nullptr) && (valid_string_length >= minimum_match_length)) 176 | { 177 | // clang-format off 178 | out.push_back ({ 179 | valid_start_offset, 180 | {p_valid_start, p_curr} 181 | }); 182 | // clang-format on 183 | } 184 | 185 | return out; 186 | } 187 | } // namespace motoi -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "enc_cp932.hpp" 2 | #include "enc_eucjp.hpp" 3 | #include "enc_shiftjis.hpp" 4 | #include "fstreams.hpp" 5 | #include "jstrings.hpp" 6 | #include "uniconv.hpp" 7 | #include "usage.hpp" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #ifdef DEBUG 18 | #include 19 | #endif 20 | 21 | using namespace std; 22 | using namespace motoi; 23 | 24 | static char constexpr CUTOFF_INDICATOR[] {"..."}; 25 | 26 | struct runtime_config_jstrings 27 | { 28 | public: 29 | string input_path; 30 | string encoding {"shiftjis"}; 31 | size_t match_length {default_match_length}; 32 | size_t cutoff {0}; 33 | bool multiline {false}; 34 | bool raw {false}; 35 | bool skip_jis0201 {false}; 36 | } cfg; 37 | 38 | void process_args(int argc, char ** argv); 39 | 40 | int main(int argc, char ** argv) 41 | { 42 | #ifdef DEBUG 43 | chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::now(); 44 | #endif 45 | istream * indata {nullptr}; 46 | ifstream infile; 47 | 48 | shiftjis_validator shiftjis_valid; 49 | cp932_validator cp932_valid; 50 | eucjp_validator eucjp_valid; 51 | 52 | // clang-format off 53 | unordered_map validators 54 | { 55 | {"shiftjis", &shiftjis_valid}, {"shift-jis", &shiftjis_valid}, {"sjis", &shiftjis_valid}, 56 | {"cp932", &cp932_valid}, {"windows932", &cp932_valid}, {"windows31j", &cp932_valid}, 57 | {"eucjp", &cp932_valid}, {"euc-jp", &cp932_valid} 58 | }; 59 | // clang-format on 60 | 61 | if (validators.count(cfg.encoding) == 0) 62 | { 63 | throw invalid_argument("Invalid encoding specified"); 64 | } 65 | 66 | try 67 | { 68 | process_args(argc, argv); 69 | 70 | if (cfg.input_path.empty()) 71 | indata = &cin; 72 | else 73 | { 74 | try 75 | { 76 | infile = ifstream_checked(cfg.input_path); 77 | } 78 | catch (system_error const & sys_ex) 79 | { 80 | cerr << sys_ex.what() << ": " << sys_ex.code().message() << endl; 81 | return -1; 82 | } 83 | indata = &infile; 84 | } 85 | 86 | #ifdef DEBUG 87 | chrono::high_resolution_clock::time_point t2 = chrono::high_resolution_clock::now(); 88 | auto duration = chrono::duration_cast(t2 - t1).count(); 89 | 90 | cerr << "SETUP: " << duration << "ms" << endl; 91 | #endif 92 | 93 | #ifdef DEBUG 94 | t1 = chrono::high_resolution_clock::now(); 95 | #endif 96 | 97 | jis_validator * validator {validators[cfg.encoding]}; 98 | validator->include_crlf(cfg.multiline); 99 | validator->skip_jis0201(cfg.skip_jis0201); 100 | uniconv conv(validator->iconv_code()); 101 | 102 | auto found_strings = find(*indata, *validator, cfg.match_length); 103 | #ifdef DEBUG 104 | t2 = chrono::high_resolution_clock::now(); 105 | duration = chrono::duration_cast(t2 - t1).count(); 106 | 107 | cerr << "VALIDATION & COPY: " << duration << "ms" << endl; 108 | #endif 109 | 110 | #ifdef DEBUG 111 | t1 = chrono::high_resolution_clock::now(); 112 | #endif 113 | cout << showbase << internal << setfill('0') << hex; 114 | 115 | size_t counter = 0; 116 | string as_utf8; 117 | for (auto & this_string : found_strings) 118 | { 119 | cout << setw(10) << this_string.first << ' '; 120 | if (cfg.raw) 121 | { 122 | copy(this_string.second.data(), 123 | this_string.second.data() + this_string.second.size(), 124 | ostream_iterator(cout)); 125 | } 126 | else 127 | { 128 | as_utf8 = conv.convert(this_string.second); 129 | if (cfg.cutoff != 0 && as_utf8.size() > cfg.cutoff) 130 | cout << string({as_utf8.begin(), as_utf8.begin() + cfg.cutoff}) << CUTOFF_INDICATOR; 131 | else 132 | cout << as_utf8; 133 | } 134 | cout << endl; 135 | ++counter; 136 | } 137 | #ifdef DEBUG 138 | t2 = chrono::high_resolution_clock::now(); 139 | duration = chrono::duration_cast(t2 - t1).count(); 140 | 141 | cerr << "OUTPUT: " << duration << "ms" << endl; 142 | #endif 143 | } 144 | catch (exception const & e) 145 | { 146 | cout << "Uncaught Exception: " << e.what() << endl; 147 | } 148 | } 149 | 150 | void process_args(int argc, char ** argv) 151 | { 152 | // clang-format off 153 | string const short_opts {":l:c:e:mrsh"}; 154 | vector