├── .clang-format
├── .clangd
├── .editorconfig
├── .gitignore
├── CMakeLists.txt
├── Doxyfile
├── LICENSE
├── README.md
├── docs
└── html
│ ├── annotated.html
│ ├── bc_s.png
│ ├── bdwn.png
│ ├── classes.html
│ ├── classjis__enc-members.html
│ ├── classjis__enc.html
│ ├── classjis__enc.png
│ ├── classshift__jis-members.html
│ ├── classshift__jis.html
│ ├── classshift__jis.png
│ ├── closed.png
│ ├── dir_68267d1309a1af8e8297ef4c3efbcdba.html
│ ├── dir_d44c64559bbebec7f509842c48db8b23.html
│ ├── doc.png
│ ├── doxygen.css
│ ├── doxygen.png
│ ├── dynsections.js
│ ├── files.html
│ ├── folderclosed.png
│ ├── folderopen.png
│ ├── functions.html
│ ├── functions_func.html
│ ├── functions_vars.html
│ ├── hierarchy.html
│ ├── index.html
│ ├── jis__enc_8h_source.html
│ ├── jquery.js
│ ├── main_8h_source.html
│ ├── menu.js
│ ├── menudata.js
│ ├── nav_f.png
│ ├── nav_g.png
│ ├── nav_h.png
│ ├── open.png
│ ├── search
│ ├── all_0.html
│ ├── all_0.js
│ ├── all_1.html
│ ├── all_1.js
│ ├── all_2.html
│ ├── all_2.js
│ ├── all_3.html
│ ├── all_3.js
│ ├── all_4.html
│ ├── all_4.js
│ ├── all_5.html
│ ├── all_5.js
│ ├── all_6.html
│ ├── all_6.js
│ ├── all_7.html
│ ├── all_7.js
│ ├── all_8.html
│ ├── all_8.js
│ ├── classes_0.html
│ ├── classes_0.js
│ ├── classes_1.html
│ ├── classes_1.js
│ ├── classes_2.html
│ ├── classes_2.js
│ ├── close.png
│ ├── functions_0.html
│ ├── functions_0.js
│ ├── functions_1.html
│ ├── functions_1.js
│ ├── functions_2.html
│ ├── functions_2.js
│ ├── functions_3.html
│ ├── functions_3.js
│ ├── mag_sel.png
│ ├── nomatches.html
│ ├── pages_0.html
│ ├── pages_0.js
│ ├── search.css
│ ├── search.js
│ ├── search_l.png
│ ├── search_m.png
│ ├── search_r.png
│ ├── searchdata.js
│ ├── variables_0.html
│ ├── variables_0.js
│ ├── variables_1.html
│ ├── variables_1.js
│ ├── variables_2.html
│ ├── variables_2.js
│ ├── variables_3.html
│ ├── variables_3.js
│ ├── variables_4.html
│ └── variables_4.js
│ ├── shift__jis_8h_source.html
│ ├── splitbar.png
│ ├── structfound__string-members.html
│ ├── structfound__string.html
│ ├── sync_off.png
│ ├── sync_on.png
│ ├── tab_a.png
│ ├── tab_b.png
│ ├── tab_h.png
│ ├── tab_s.png
│ └── tabs.css
├── inc
├── enc_cp932.hpp
├── enc_eucjp.hpp
├── enc_shiftjis.hpp
├── enc_validator.hpp
├── filesys.hpp
├── fstreams.hpp
├── iconv_encoding.hpp
├── jis_validator.hpp
├── jstrings.hpp
├── types.hpp
├── uniconv.hpp
└── usage.hpp
└── src
├── app.hpp
├── app.hpp.cfg
├── enc_cp932.cpp
├── enc_eucjp.cpp
├── enc_shiftjis.cpp
├── jstrings.cpp
├── main.cpp
└── usage.cpp
/.clang-format:
--------------------------------------------------------------------------------
1 | Language: Cpp
2 | BasedOnStyle: LLVM
3 |
4 | AlignAfterOpenBracket: DontAlign
5 | AlignEscapedNewlines: DontAlign
6 | AlignOperands: Align
7 | AlignTrailingComments: true
8 | AllowAllArgumentsOnNextLine: true
9 | AllowAllConstructorInitializersOnNextLine: true
10 | AlignArrayOfStructures: None
11 | AllowShortBlocksOnASingleLine: false
12 | AllowShortCaseLabelsOnASingleLine: false
13 | AllowShortFunctionsOnASingleLine: Empty
14 | AllowShortLoopsOnASingleLine: false
15 | AllowShortIfStatementsOnASingleLine: Never
16 | AlwaysBreakBeforeMultilineStrings: true
17 | AlwaysBreakTemplateDeclarations: Yes
18 | BreakBeforeBraces: Allman
19 | BreakBeforeTernaryOperators: true
20 | BreakConstructorInitializers: AfterColon
21 | BinPackArguments: false
22 | BinPackParameters: false
23 | ColumnLimit: 120
24 | ContinuationIndentWidth: 2
25 | Cpp11BracedListStyle: true
26 | Language: Cpp
27 | MaxEmptyLinesToKeep: 1
28 | IndentCaseLabels: true
29 | PackConstructorInitializers: Never
30 | PointerAlignment: Middle
31 | ReferenceAlignment: Pointer
32 | SortIncludes: true
33 | SpaceAfterCStyleCast: true
34 | SpaceAfterLogicalNot: true
35 | SpaceAroundPointerQualifiers: Both
36 | SpaceBeforeAssignmentOperators: true
37 | SpaceBeforeCpp11BracedList: true
38 | SpaceBeforeCtorInitializerColon: true
39 | SpaceBeforeInheritanceColon: true
40 | SpaceBeforeRangeBasedForLoopColon: true
41 | SpaceBeforeParens: ControlStatements
42 | SpacesInAngles: false
43 | SpacesInCStyleCastParentheses: false
44 | SpacesInParentheses: false
45 | SpacesInSquareBrackets: false
46 | Standard: Cpp11
47 | TabWidth: 2
48 | UseTab: Always
49 |
--------------------------------------------------------------------------------
/.clangd:
--------------------------------------------------------------------------------
1 | CompileFlags:
2 | # Treat code as C++, use C++17 standard, enable more warnings.
3 | Add: [-xc++, -std=c++17, -Wall]
4 | Diagnostics:
5 | ClangTidy:
6 | Add: [performance*, modernize*]
7 | Remove: [modernize-use-trailing-return-type, modernize-avoid-c-arrays]
8 | CheckOptions:
9 | readability-identifier-naming.VariableCase: SnakeCase
10 |
11 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | end_of_line = lf
3 | insert_final_newline = true
4 | charset = utf-8
5 | trim_trailing_whitespace = true
6 |
7 | [*.{c,h,cpp,hpp}]
8 | indent_style = tab
9 | indent_size = 2
10 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .cache/
2 | .vscode
3 | *.code-workspace
4 | bin
5 | build
6 | etc
7 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include(CheckIncludeFiles)
2 |
3 | # define project
4 | cmake_minimum_required(VERSION 3.5)
5 |
6 | project(jstrings VERSION 1.6 LANGUAGES CXX)
7 |
8 | set(PROJECT_CONTACT "Damian R (damian@motoi.pro)")
9 | set(PROJECT_WEBSITE "https://github.com/drojaazu")
10 | set(PROJECT_COPYRIGHT "©2018 Motoi Productions / Released under MIT License")
11 | set(PROJECT_BRIEF "A tool for finding JIS-based Japanese text in binary data.")
12 |
13 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/app.hpp.cfg" "${CMAKE_CURRENT_SOURCE_DIR}/src/app.hpp" ESCAPE_QUOTES)
14 |
15 | set(CMAKE_CXX_STANDARD 17)
16 | set(CMAKE_CXX_COMPILER_NAMES clang++ g++ icpc c++ cxx)
17 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG")
18 |
19 | if(NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt)
20 | if(NOT CMAKE_BUILD_TYPE)
21 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
22 | endif()
23 | endif()
24 |
25 | # define target
26 | aux_source_directory("${CMAKE_CURRENT_SOURCE_DIR}/src" SRCFILES)
27 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/inc")
28 |
29 | add_executable(${PROJECT_NAME} ${SRCFILES})
30 |
31 | target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_17)
32 |
33 | find_library(ICONV_LIB iconv)
34 |
35 | if(NOT ICONV_LIB)
36 | message(FATAL_ERROR "libiconv not found")
37 | endif()
38 |
39 | target_link_libraries(${PROJECT_NAME} iconv)
40 |
41 | install(TARGETS jstrings
42 | RUNTIME DESTINATION bin)
43 |
--------------------------------------------------------------------------------
/Doxyfile:
--------------------------------------------------------------------------------
1 | # Doxyfile 1.8.13
2 | DOXYFILE_ENCODING = UTF-8
3 | PROJECT_NAME = jstrings
4 | PROJECT_NUMBER = 1
5 | PROJECT_BRIEF = "A tool for finding JIS-based character strings in binary streams"
6 | OUTPUT_DIRECTORY = docs
7 | TAB_SIZE = 2
8 | USE_MDFILE_AS_MAINPAGE = README.md
9 | GENERATE_LATEX = NO
10 | GENERATE_HTML = YES
11 |
12 | INPUT = README.md src include
13 | FILE_PATTERNS = *.cpp *.h
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018-2019 Damian Rogers
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # jstrings
2 |
3 | A tool for finding JIS encoded Japanese text in binary data.
4 |
5 | ## Usage
6 |
7 | jstrings [options] [input_file]
8 |
9 | Input can be a filename or data from stdin. Output is sent to stdout.
10 |
11 | ### Options
12 |
13 | -e encoding
14 | --encoding encoding
15 |
16 | Specify the encoding to use. Use one of the strings listed in parantheses below for that encoding:
17 |
18 | * Shift-JIS (shift-jis, shiftjis, sjis)
19 | * EUC-JP (euc, euc-jp, eucjp)
20 | * Microsoft CP932 (cp932, windows932, windows31j)
21 |
22 | Optional; default is Shift-JIS.
23 |
24 | -l value
25 | --match-length value
26 |
27 | Set number of consecutive characters required to be considered a valid string.
28 |
29 | Optional; default is 5.
30 |
31 | -c value
32 | --cutoff value
33 |
34 | Limit the output to the specified number of characters for a string. This is useful for "previewing" a file which may have large blocks of junk data that happen to fall within the range of valid code points. Strings that are cut off will be appended with an ellipsis.
35 |
36 | Note that the length is in bytes, not characters. As such, due to the variable width nature of UTF-8, there is a chance the final character displayed may be incorrect. STL string functions do not work natively with encodings and the author feels that the work needed to implement this for an optional feature that should only be used for quickly previewing data would be overly complex.
37 |
38 | Optional; default is no cutoff.
39 |
40 | -m
41 | --multiline
42 |
43 | Include newline characters (0x0D or 0x0D0A) as valid. Otherwise, these will count as end of string markers.
44 |
45 | Optional; default is disabled.
46 |
47 | -r
48 | --raw
49 |
50 | Output the data in its original encoding without converting to Unicode.
51 |
52 | Optional; default is disabled (will convert output strings to UTF-8 using libiconv).
53 |
54 | -s
55 | --skip-jis0201
56 |
57 | Skip checking for JIS X 0201 characters. These is an 8 bit katakana-only code space that acts as a supplement to ASCII and was generally only used in older (early to mid 1980s) home computers. Disabling this can reduce false positives if you are working with newer data.
58 |
59 | Optional; default is disabled (will include JIS X 0201 code points as valid matches).
60 |
61 | ## Output
62 |
63 | Found strings are prepended with the offset in which they were found in the original data and sent to stdout. Strings are converted to UTF-8 using libiconv. The original encoding can be preserved by using the `--raw` option.
64 |
65 | ## Building
66 |
67 | CMake is used for the build system. From the root directory:
68 |
69 | mkdir build && cd build
70 | cmake ..
71 | make
72 | sudo make install
73 |
--------------------------------------------------------------------------------
/docs/html/annotated.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Class List
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
66 |
67 |
Here are the classes, structs, unions and interfaces with brief descriptions:
68 |
69 | C found_string POD structure for containing a found string
70 | C jis_enc Abstract class for JIS based encoding classes
71 | C shift_jis Find Shift-JIS encoded strings in a byte stream
72 |
73 |
74 |
75 |
76 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/docs/html/bc_s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/bc_s.png
--------------------------------------------------------------------------------
/docs/html/bdwn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/bdwn.png
--------------------------------------------------------------------------------
/docs/html/classes.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Class Index
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
66 |
79 |
80 |
85 |
86 |
87 |
--------------------------------------------------------------------------------
/docs/html/classjis__enc-members.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Member List
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
62 |
66 |
67 |
68 |
This is the complete list of members for jis_enc , including all inherited members.
69 |
84 |
85 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/docs/html/classjis__enc.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: jis_enc Class Reference
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
62 |
70 |
71 |
72 |
Abstract class for JIS based encoding classes.
73 | More...
74 |
75 |
#include <jis_enc.h >
76 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
120 |
122 |
123 | std::istream * instream
124 |
125 |
126 | size_t min_len = 10
127 | Minimum number of characters to match to count as a found string.
128 |
129 |
130 | bool is_big_endian = true
131 | Determines byte order for multibyte characters.
132 |
133 |
134 | bool use_jisx0213 = false
135 | Use the JIS X 0213 character set instead of JIS X 0208.
136 |
137 |
138 |
139 |
Abstract class for JIS based encoding classes.
140 |
The documentation for this class was generated from the following files:
141 | include/jis_enc.h
142 | src/jis_enc.cpp
143 |
144 |
145 |
146 |
151 |
152 |
153 |
--------------------------------------------------------------------------------
/docs/html/classjis__enc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/classjis__enc.png
--------------------------------------------------------------------------------
/docs/html/classshift__jis-members.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Member List
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
62 |
66 |
67 |
68 |
This is the complete list of members for shift_jis , including all inherited members.
69 |
89 |
90 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/docs/html/classshift__jis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/classshift__jis.png
--------------------------------------------------------------------------------
/docs/html/closed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/closed.png
--------------------------------------------------------------------------------
/docs/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: src Directory Reference
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
65 |
66 |
70 |
71 |
72 |
73 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/docs/html/dir_d44c64559bbebec7f509842c48db8b23.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: include Directory Reference
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
65 |
66 |
70 |
71 |
72 |
73 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/docs/html/doc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/doc.png
--------------------------------------------------------------------------------
/docs/html/doxygen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/doxygen.png
--------------------------------------------------------------------------------
/docs/html/dynsections.js:
--------------------------------------------------------------------------------
1 | function toggleVisibility(linkObj)
2 | {
3 | var base = $(linkObj).attr('id');
4 | var summary = $('#'+base+'-summary');
5 | var content = $('#'+base+'-content');
6 | var trigger = $('#'+base+'-trigger');
7 | var src=$(trigger).attr('src');
8 | if (content.is(':visible')===true) {
9 | content.hide();
10 | summary.show();
11 | $(linkObj).addClass('closed').removeClass('opened');
12 | $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png');
13 | } else {
14 | content.show();
15 | summary.hide();
16 | $(linkObj).removeClass('closed').addClass('opened');
17 | $(trigger).attr('src',src.substring(0,src.length-10)+'open.png');
18 | }
19 | return false;
20 | }
21 |
22 | function updateStripes()
23 | {
24 | $('table.directory tr').
25 | removeClass('even').filter(':visible:even').addClass('even');
26 | }
27 |
28 | function toggleLevel(level)
29 | {
30 | $('table.directory tr').each(function() {
31 | var l = this.id.split('_').length-1;
32 | var i = $('#img'+this.id.substring(3));
33 | var a = $('#arr'+this.id.substring(3));
34 | if (l
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: File List
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
66 |
67 |
Here is a list of all documented files with brief descriptions:
68 |
[detail level 1 2 ]
69 | ▼ include
70 | jis_enc.h
71 | main.h
72 | shift_jis.h
73 |
74 |
75 |
76 |
77 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docs/html/folderclosed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/folderclosed.png
--------------------------------------------------------------------------------
/docs/html/folderopen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/folderopen.png
--------------------------------------------------------------------------------
/docs/html/functions.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Class Members
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
63 |
Here is a list of all documented class members with links to the class documentation for each member:
111 |
112 |
113 |
118 |
119 |
120 |
--------------------------------------------------------------------------------
/docs/html/functions_func.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Class Members - Functions
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
63 |
64 | find()
65 | : jis_enc
66 | , shift_jis
67 |
68 | get_is_big_endian()
69 | : jis_enc
70 |
71 | get_min_len()
72 | : jis_enc
73 |
74 | get_use_jisx0213()
75 | : jis_enc
76 |
77 | is_jisx0201_printable()
78 | : shift_jis
79 |
80 | is_jisx0208()
81 | : shift_jis
82 |
83 | is_jisx0213()
84 | : shift_jis
85 |
86 | set_is_big_endian()
87 | : jis_enc
88 |
89 | set_min_len()
90 | : jis_enc
91 |
92 | set_use_jisx0213()
93 | : jis_enc
94 |
95 |
96 |
97 |
98 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/docs/html/functions_vars.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Class Members - Variables
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
81 |
82 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/docs/html/hierarchy.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Class Hierarchy
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
66 |
67 |
This inheritance list is sorted roughly, but not completely, alphabetically:
68 |
[detail level 1 2 ]
69 | C found_string POD structure for containing a found string
70 | ▼ C jis_enc Abstract class for JIS based encoding classes
71 | C shift_jis Find Shift-JIS encoded strings in a byte stream
72 |
73 |
74 |
75 |
76 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/docs/html/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: jstrings
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
49 |
53 |
54 |
55 |
56 |
57 |
60 |
61 |
62 |
66 |
67 |
A tool for finding JIS-based Japanese characters in binary data.
68 |
Usage
69 |
jstrings [options] [input]
70 |
Input can be a filename or data from stdin.
71 |
Options
72 |
-m number
73 |
Set minimum number of characters to match as a valid string. Default: 10.
74 |
-e encoding
75 |
Specify the encoding to use. Currently, the only valid value is "shift-jis". Default: shift-jis
76 |
-l
77 |
Use little-endian order for multibyte characters
78 |
-jisx0213
79 |
Use JIS X 0213 character set instead of JIS X 0208 for double byte characters
80 |
Notes
81 |
Data is output in its original encoding without any conversion. Other tools, such as iconv, can do conversion to something more useful (such as UTF8). For example:
82 |
jstrings file.bin | iconv -f SHIFT-JIS -t UTF-8 -c
83 |
To Do
84 |
85 | Add support for other JIS encodings: CP932, EUC
86 | Add support for JIS X 0212 for non-SJIS encodings (only EUC?)
87 | Add option to only return strings with double-byte characters present
88 |
89 |
90 |
91 |
96 |
97 |
98 |
--------------------------------------------------------------------------------
/docs/html/jis__enc_8h_source.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: include/jis_enc.h Source File
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
65 |
66 |
70 |
71 |
31 std::istream* instream;
39 bool is_big_endian =
true ;
43 bool use_jisx0213 =
false ;
46 jis_enc (std::istream* instream);
51 virtual std::vector<found_string>* find() = 0;
55 void set_min_len(
size_t min_len);
63 void set_is_big_endian(
bool is_big_endian);
67 bool get_is_big_endian();
71 void set_use_jisx0213(
bool use_jisx0213);
75 bool get_use_jisx0213();
std::vector< uint8_t > data
The extracted string data.
Definition: jis_enc.h:23
72 |
off_t address
The offset of the beginning of the found string relative to the start of the stream.
Definition: jis_enc.h:19
73 |
Abstract class for JIS based encoding classes.
Definition: jis_enc.h:29
74 |
POD structure for containing a found string.
Definition: jis_enc.h:15
75 |
76 |
77 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docs/html/main_8h_source.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: include/main.h Source File
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
65 |
66 |
70 |
71 |
13 enum encodings { shift_jis_enc, cp932_enc, euc_enc };
15 int process_args(
int argc,
char **argv);
72 |
73 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/docs/html/menu.js:
--------------------------------------------------------------------------------
1 | function initMenu(relPath,searchEnabled,serverSide,searchPage,search) {
2 | function makeTree(data,relPath) {
3 | var result='';
4 | if ('children' in data) {
5 | result+='';
12 | }
13 | return result;
14 | }
15 |
16 | $('#main-nav').append(makeTree(menudata,relPath));
17 | $('#main-nav').children(':first').addClass('sm sm-dox').attr('id','main-menu');
18 | if (searchEnabled) {
19 | if (serverSide) {
20 | $('#main-menu').append(' ');
21 | } else {
22 | $('#main-menu').append(' ');
23 | }
24 | }
25 | $('#main-menu').smartmenus();
26 | }
27 |
--------------------------------------------------------------------------------
/docs/html/menudata.js:
--------------------------------------------------------------------------------
1 | var menudata={children:[
2 | {text:"Main Page",url:"index.html"},
3 | {text:"Classes",url:"annotated.html",children:[
4 | {text:"Class List",url:"annotated.html"},
5 | {text:"Class Index",url:"classes.html"},
6 | {text:"Class Hierarchy",url:"hierarchy.html"},
7 | {text:"Class Members",url:"functions.html",children:[
8 | {text:"All",url:"functions.html"},
9 | {text:"Functions",url:"functions_func.html"},
10 | {text:"Variables",url:"functions_vars.html"}]}]},
11 | {text:"Files",url:"files.html",children:[
12 | {text:"File List",url:"files.html"}]}]}
13 |
--------------------------------------------------------------------------------
/docs/html/nav_f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/nav_f.png
--------------------------------------------------------------------------------
/docs/html/nav_g.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/nav_g.png
--------------------------------------------------------------------------------
/docs/html/nav_h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/nav_h.png
--------------------------------------------------------------------------------
/docs/html/open.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/open.png
--------------------------------------------------------------------------------
/docs/html/search/all_0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_0.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['address',['address',['../structfound__string.html#abe22b9db7b758858c8615bcce8c709eb',1,'found_string']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/all_1.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_1.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['data',['data',['../structfound__string.html#a0c6e7a1688668189558667ac6eafa1bd',1,'found_string']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/all_2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_2.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['find',['find',['../classjis__enc.html#ada0cfd52a1ec8ee216138e2a72588a0e',1,'jis_enc::find()'],['../classshift__jis.html#ae0b748a6775bdc79d2166dfde3c6db0a',1,'shift_jis::find()']]],
4 | ['found_5fstring',['found_string',['../structfound__string.html',1,'']]]
5 | ];
6 |
--------------------------------------------------------------------------------
/docs/html/search/all_3.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_3.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['get_5fis_5fbig_5fendian',['get_is_big_endian',['../classjis__enc.html#a074c250e987744a27aa03db866d2cfdb',1,'jis_enc']]],
4 | ['get_5fmin_5flen',['get_min_len',['../classjis__enc.html#ac32e7ab8d1376aca07d17748fd22fb62',1,'jis_enc']]],
5 | ['get_5fuse_5fjisx0213',['get_use_jisx0213',['../classjis__enc.html#afa01565401216a5bef8fc5f7346ac7dc',1,'jis_enc']]]
6 | ];
7 |
--------------------------------------------------------------------------------
/docs/html/search/all_4.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_4.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['is_5fbig_5fendian',['is_big_endian',['../classjis__enc.html#a938547156f97dfd2d525a5205744f4ca',1,'jis_enc']]],
4 | ['is_5fjisx0201_5fprintable',['is_jisx0201_printable',['../classshift__jis.html#abe70f02f23af28e59dde449c3a21be68',1,'shift_jis']]],
5 | ['is_5fjisx0208',['is_jisx0208',['../classshift__jis.html#ab2f235118a565abbf6ca9bd831ffa4ea',1,'shift_jis']]],
6 | ['is_5fjisx0213',['is_jisx0213',['../classshift__jis.html#ae1d0ad63b01a645c1ecf7f69660b8d41',1,'shift_jis']]]
7 | ];
8 |
--------------------------------------------------------------------------------
/docs/html/search/all_5.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_5.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['jstrings',['jstrings',['../index.html',1,'']]],
4 | ['jis_5fenc',['jis_enc',['../classjis__enc.html',1,'']]]
5 | ];
6 |
--------------------------------------------------------------------------------
/docs/html/search/all_6.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_6.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['min_5flen',['min_len',['../classjis__enc.html#a61a76eed4c65b7666d5354f89dfae796',1,'jis_enc']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/all_7.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_7.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['set_5fis_5fbig_5fendian',['set_is_big_endian',['../classjis__enc.html#a22a855a3769ee68e798d41548c58678b',1,'jis_enc']]],
4 | ['set_5fmin_5flen',['set_min_len',['../classjis__enc.html#a85e34f8f9960d895caf72c616046f20d',1,'jis_enc']]],
5 | ['set_5fuse_5fjisx0213',['set_use_jisx0213',['../classjis__enc.html#ad93d9e0a295fe97494a9b1dec6c9b926',1,'jis_enc']]],
6 | ['shift_5fjis',['shift_jis',['../classshift__jis.html',1,'']]]
7 | ];
8 |
--------------------------------------------------------------------------------
/docs/html/search/all_8.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/all_8.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['use_5fjisx0213',['use_jisx0213',['../classjis__enc.html#a9dbb086c214f28bf82b9304a3f027d48',1,'jis_enc']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/classes_0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/classes_0.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['found_5fstring',['found_string',['../structfound__string.html',1,'']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/classes_1.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/classes_1.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['jis_5fenc',['jis_enc',['../classjis__enc.html',1,'']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/classes_2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/classes_2.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['shift_5fjis',['shift_jis',['../classshift__jis.html',1,'']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/close.png
--------------------------------------------------------------------------------
/docs/html/search/functions_0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/functions_0.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['find',['find',['../classjis__enc.html#ada0cfd52a1ec8ee216138e2a72588a0e',1,'jis_enc::find()'],['../classshift__jis.html#ae0b748a6775bdc79d2166dfde3c6db0a',1,'shift_jis::find()']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/functions_1.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/functions_1.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['get_5fis_5fbig_5fendian',['get_is_big_endian',['../classjis__enc.html#a074c250e987744a27aa03db866d2cfdb',1,'jis_enc']]],
4 | ['get_5fmin_5flen',['get_min_len',['../classjis__enc.html#ac32e7ab8d1376aca07d17748fd22fb62',1,'jis_enc']]],
5 | ['get_5fuse_5fjisx0213',['get_use_jisx0213',['../classjis__enc.html#afa01565401216a5bef8fc5f7346ac7dc',1,'jis_enc']]]
6 | ];
7 |
--------------------------------------------------------------------------------
/docs/html/search/functions_2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/functions_2.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['is_5fjisx0201_5fprintable',['is_jisx0201_printable',['../classshift__jis.html#abe70f02f23af28e59dde449c3a21be68',1,'shift_jis']]],
4 | ['is_5fjisx0208',['is_jisx0208',['../classshift__jis.html#ab2f235118a565abbf6ca9bd831ffa4ea',1,'shift_jis']]],
5 | ['is_5fjisx0213',['is_jisx0213',['../classshift__jis.html#ae1d0ad63b01a645c1ecf7f69660b8d41',1,'shift_jis']]]
6 | ];
7 |
--------------------------------------------------------------------------------
/docs/html/search/functions_3.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/functions_3.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['set_5fis_5fbig_5fendian',['set_is_big_endian',['../classjis__enc.html#a22a855a3769ee68e798d41548c58678b',1,'jis_enc']]],
4 | ['set_5fmin_5flen',['set_min_len',['../classjis__enc.html#a85e34f8f9960d895caf72c616046f20d',1,'jis_enc']]],
5 | ['set_5fuse_5fjisx0213',['set_use_jisx0213',['../classjis__enc.html#ad93d9e0a295fe97494a9b1dec6c9b926',1,'jis_enc']]]
6 | ];
7 |
--------------------------------------------------------------------------------
/docs/html/search/mag_sel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/mag_sel.png
--------------------------------------------------------------------------------
/docs/html/search/nomatches.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/html/search/pages_0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/pages_0.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['jstrings',['jstrings',['../index.html',1,'']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/search.css:
--------------------------------------------------------------------------------
1 | /*---------------- Search Box */
2 |
3 | #FSearchBox {
4 | float: left;
5 | }
6 |
7 | #MSearchBox {
8 | white-space : nowrap;
9 | float: none;
10 | margin-top: 8px;
11 | right: 0px;
12 | width: 170px;
13 | height: 24px;
14 | z-index: 102;
15 | }
16 |
17 | #MSearchBox .left
18 | {
19 | display:block;
20 | position:absolute;
21 | left:10px;
22 | width:20px;
23 | height:19px;
24 | background:url('search_l.png') no-repeat;
25 | background-position:right;
26 | }
27 |
28 | #MSearchSelect {
29 | display:block;
30 | position:absolute;
31 | width:20px;
32 | height:19px;
33 | }
34 |
35 | .left #MSearchSelect {
36 | left:4px;
37 | }
38 |
39 | .right #MSearchSelect {
40 | right:5px;
41 | }
42 |
43 | #MSearchField {
44 | display:block;
45 | position:absolute;
46 | height:19px;
47 | background:url('search_m.png') repeat-x;
48 | border:none;
49 | width:115px;
50 | margin-left:20px;
51 | padding-left:4px;
52 | color: #909090;
53 | outline: none;
54 | font: 9pt Arial, Verdana, sans-serif;
55 | -webkit-border-radius: 0px;
56 | }
57 |
58 | #FSearchBox #MSearchField {
59 | margin-left:15px;
60 | }
61 |
62 | #MSearchBox .right {
63 | display:block;
64 | position:absolute;
65 | right:10px;
66 | top:8px;
67 | width:20px;
68 | height:19px;
69 | background:url('search_r.png') no-repeat;
70 | background-position:left;
71 | }
72 |
73 | #MSearchClose {
74 | display: none;
75 | position: absolute;
76 | top: 4px;
77 | background : none;
78 | border: none;
79 | margin: 0px 4px 0px 0px;
80 | padding: 0px 0px;
81 | outline: none;
82 | }
83 |
84 | .left #MSearchClose {
85 | left: 6px;
86 | }
87 |
88 | .right #MSearchClose {
89 | right: 2px;
90 | }
91 |
92 | .MSearchBoxActive #MSearchField {
93 | color: #000000;
94 | }
95 |
96 | /*---------------- Search filter selection */
97 |
98 | #MSearchSelectWindow {
99 | display: none;
100 | position: absolute;
101 | left: 0; top: 0;
102 | border: 1px solid #90A5CE;
103 | background-color: #F9FAFC;
104 | z-index: 10001;
105 | padding-top: 4px;
106 | padding-bottom: 4px;
107 | -moz-border-radius: 4px;
108 | -webkit-border-top-left-radius: 4px;
109 | -webkit-border-top-right-radius: 4px;
110 | -webkit-border-bottom-left-radius: 4px;
111 | -webkit-border-bottom-right-radius: 4px;
112 | -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
113 | }
114 |
115 | .SelectItem {
116 | font: 8pt Arial, Verdana, sans-serif;
117 | padding-left: 2px;
118 | padding-right: 12px;
119 | border: 0px;
120 | }
121 |
122 | span.SelectionMark {
123 | margin-right: 4px;
124 | font-family: monospace;
125 | outline-style: none;
126 | text-decoration: none;
127 | }
128 |
129 | a.SelectItem {
130 | display: block;
131 | outline-style: none;
132 | color: #000000;
133 | text-decoration: none;
134 | padding-left: 6px;
135 | padding-right: 12px;
136 | }
137 |
138 | a.SelectItem:focus,
139 | a.SelectItem:active {
140 | color: #000000;
141 | outline-style: none;
142 | text-decoration: none;
143 | }
144 |
145 | a.SelectItem:hover {
146 | color: #FFFFFF;
147 | background-color: #3D578C;
148 | outline-style: none;
149 | text-decoration: none;
150 | cursor: pointer;
151 | display: block;
152 | }
153 |
154 | /*---------------- Search results window */
155 |
156 | iframe#MSearchResults {
157 | width: 60ex;
158 | height: 15em;
159 | }
160 |
161 | #MSearchResultsWindow {
162 | display: none;
163 | position: absolute;
164 | left: 0; top: 0;
165 | border: 1px solid #000;
166 | background-color: #EEF1F7;
167 | z-index:10000;
168 | }
169 |
170 | /* ----------------------------------- */
171 |
172 |
173 | #SRIndex {
174 | clear:both;
175 | padding-bottom: 15px;
176 | }
177 |
178 | .SREntry {
179 | font-size: 10pt;
180 | padding-left: 1ex;
181 | }
182 |
183 | .SRPage .SREntry {
184 | font-size: 8pt;
185 | padding: 1px 5px;
186 | }
187 |
188 | body.SRPage {
189 | margin: 5px 2px;
190 | }
191 |
192 | .SRChildren {
193 | padding-left: 3ex; padding-bottom: .5em
194 | }
195 |
196 | .SRPage .SRChildren {
197 | display: none;
198 | }
199 |
200 | .SRSymbol {
201 | font-weight: bold;
202 | color: #425E97;
203 | font-family: Arial, Verdana, sans-serif;
204 | text-decoration: none;
205 | outline: none;
206 | }
207 |
208 | a.SRScope {
209 | display: block;
210 | color: #425E97;
211 | font-family: Arial, Verdana, sans-serif;
212 | text-decoration: none;
213 | outline: none;
214 | }
215 |
216 | a.SRSymbol:focus, a.SRSymbol:active,
217 | a.SRScope:focus, a.SRScope:active {
218 | text-decoration: underline;
219 | }
220 |
221 | span.SRScope {
222 | padding-left: 4px;
223 | }
224 |
225 | .SRPage .SRStatus {
226 | padding: 2px 5px;
227 | font-size: 8pt;
228 | font-style: italic;
229 | }
230 |
231 | .SRResult {
232 | display: none;
233 | }
234 |
235 | DIV.searchresults {
236 | margin-left: 10px;
237 | margin-right: 10px;
238 | }
239 |
240 | /*---------------- External search page results */
241 |
242 | .searchresult {
243 | background-color: #F0F3F8;
244 | }
245 |
246 | .pages b {
247 | color: white;
248 | padding: 5px 5px 3px 5px;
249 | background-image: url("../tab_a.png");
250 | background-repeat: repeat-x;
251 | text-shadow: 0 1px 1px #000000;
252 | }
253 |
254 | .pages {
255 | line-height: 17px;
256 | margin-left: 4px;
257 | text-decoration: none;
258 | }
259 |
260 | .hl {
261 | font-weight: bold;
262 | }
263 |
264 | #searchresults {
265 | margin-bottom: 20px;
266 | }
267 |
268 | .searchpages {
269 | margin-top: 10px;
270 | }
271 |
272 |
--------------------------------------------------------------------------------
/docs/html/search/search_l.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/search_l.png
--------------------------------------------------------------------------------
/docs/html/search/search_m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/search_m.png
--------------------------------------------------------------------------------
/docs/html/search/search_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/search/search_r.png
--------------------------------------------------------------------------------
/docs/html/search/searchdata.js:
--------------------------------------------------------------------------------
1 | var indexSectionsWithContent =
2 | {
3 | 0: "adfgijmsu",
4 | 1: "fjs",
5 | 2: "fgis",
6 | 3: "adimu",
7 | 4: "j"
8 | };
9 |
10 | var indexSectionNames =
11 | {
12 | 0: "all",
13 | 1: "classes",
14 | 2: "functions",
15 | 3: "variables",
16 | 4: "pages"
17 | };
18 |
19 | var indexSectionLabels =
20 | {
21 | 0: "All",
22 | 1: "Classes",
23 | 2: "Functions",
24 | 3: "Variables",
25 | 4: "Pages"
26 | };
27 |
28 |
--------------------------------------------------------------------------------
/docs/html/search/variables_0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/variables_0.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['address',['address',['../structfound__string.html#abe22b9db7b758858c8615bcce8c709eb',1,'found_string']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/variables_1.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/variables_1.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['data',['data',['../structfound__string.html#a0c6e7a1688668189558667ac6eafa1bd',1,'found_string']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/variables_2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/variables_2.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['is_5fbig_5fendian',['is_big_endian',['../classjis__enc.html#a938547156f97dfd2d525a5205744f4ca',1,'jis_enc']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/variables_3.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/variables_3.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['min_5flen',['min_len',['../classjis__enc.html#a61a76eed4c65b7666d5354f89dfae796',1,'jis_enc']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/search/variables_4.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Loading...
12 |
13 |
16 |
Searching...
17 |
No Matches
18 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/html/search/variables_4.js:
--------------------------------------------------------------------------------
1 | var searchData=
2 | [
3 | ['use_5fjisx0213',['use_jisx0213',['../classjis__enc.html#a9dbb086c214f28bf82b9304a3f027d48',1,'jis_enc']]]
4 | ];
5 |
--------------------------------------------------------------------------------
/docs/html/shift__jis_8h_source.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: include/shift_jis.h Source File
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
65 |
66 |
70 |
71 |
16 static bool is_byte2_valid(
const uint8_t* c);
21 std::vector<found_string>*
find ();
33 static bool is_jisx0208 (
const uint8_t* c_h,
const uint8_t* c_l);
38 static bool is_jisx0213 (
const uint8_t* c_h,
const uint8_t* c_l);
static bool is_jisx0201_printable(const uint8_t *c)
Determines if a given byte is valid for JIS X 0201 and is printable.
Definition: shift_jis.cpp:98
72 |
std::vector< found_string > * find()
Performs the search of the data stream.
Definition: shift_jis.cpp:25
73 |
static bool is_jisx0208(const uint8_t *c_h, const uint8_t *c_l)
Determines if the given bytes make up a valid JIS X 0208 character.
Definition: shift_jis.cpp:106
74 |
Abstract class for JIS based encoding classes.
Definition: jis_enc.h:29
75 |
static bool is_jisx0213(const uint8_t *c_h, const uint8_t *c_l)
Determines if the given bytes make up a valid JIS X 0213 character.
Definition: shift_jis.cpp:172
76 |
Find Shift-JIS encoded strings in a byte stream.
Definition: shift_jis.h:15
77 |
78 |
79 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/docs/html/splitbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/splitbar.png
--------------------------------------------------------------------------------
/docs/html/structfound__string-members.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: Member List
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
62 |
66 |
67 |
68 |
This is the complete list of members for found_string , including all inherited members.
69 |
73 |
74 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/docs/html/structfound__string.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | jstrings: found_string Struct Reference
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | jstrings
25 | 1
26 |
27 | A tool for finding JIS-based character strings in binary streams
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
38 |
39 |
40 |
46 |
47 |
48 |
52 |
53 |
54 |
55 |
56 |
59 |
60 |
61 |
62 |
69 |
70 |
71 |
POD structure for containing a found string.
72 | More...
73 |
74 |
#include <jis_enc.h >
75 |
76 |
78 |
79 | off_t address
80 | The offset of the beginning of the found string relative to the start of the stream.
81 |
82 |
83 | std::vector< uint8_t > data
84 | The extracted string data.
85 |
86 |
87 |
88 |
POD structure for containing a found string.
89 |
Author Damian Rogers (damia.nosp@m. n@su.nosp@m. dden-.nosp@m. desu.nosp@m. .net )
90 |
Version 1.0
91 |
Date 2017.12.30
92 |
Copyright GNU Public License
93 |
The documentation for this struct was generated from the following file:
96 |
97 |
98 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/docs/html/sync_off.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/sync_off.png
--------------------------------------------------------------------------------
/docs/html/sync_on.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/sync_on.png
--------------------------------------------------------------------------------
/docs/html/tab_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_a.png
--------------------------------------------------------------------------------
/docs/html/tab_b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_b.png
--------------------------------------------------------------------------------
/docs/html/tab_h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_h.png
--------------------------------------------------------------------------------
/docs/html/tab_s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/drojaazu/jstrings/bdeef30593790062d99d8b64d0b80e7b69f0b08b/docs/html/tab_s.png
--------------------------------------------------------------------------------
/docs/html/tabs.css:
--------------------------------------------------------------------------------
1 | .sm{position:relative;z-index:9999}.sm,.sm ul,.sm li{display:block;list-style:none;margin:0;padding:0;line-height:normal;direction:ltr;text-align:left;-webkit-tap-highlight-color:rgba(0,0,0,0)}.sm-rtl,.sm-rtl ul,.sm-rtl li{direction:rtl;text-align:right}.sm>li>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}#doc-content{overflow:auto;display:block;padding:0;margin:0;-webkit-overflow-scrolling:touch}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace!important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0!important;-webkit-border-radius:0;border-radius:0!important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px!important;-webkit-border-radius:5px;border-radius:5px!important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0!important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px!important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}}
--------------------------------------------------------------------------------
/inc/enc_cp932.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ENC_CP932_HPP
2 | #define ENC_CP932_HPP
3 |
4 | #include "enc_shiftjis.hpp"
5 |
6 | namespace motoi
7 | {
8 |
9 | /**
10 | * @brief Determines if input data points to a valid MS Code Page 932 code point
11 | *
12 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the
13 | * number of bytes in the code point sequence
14 | *
15 | */
16 | class cp932_validator : public shiftjis_validator
17 | {
18 | public:
19 | cp932_validator()
20 | {
21 | m_iconv_code = "CP932";
22 | }
23 |
24 | uint is_valid(byte_t const * data) const override;
25 | ~cp932_validator() override = default;
26 | };
27 |
28 | } // namespace motoi
29 |
30 | #endif
--------------------------------------------------------------------------------
/inc/enc_eucjp.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ENC_EUCJP_HPP
2 | #define ENC_EUCJP_HPP
3 |
4 | #include "jis_validator.hpp"
5 |
6 | namespace motoi
7 | {
8 |
9 | /**
10 | * @brief Determines if input data points to a valid EUC JP code point
11 | *
12 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the
13 | * number of bytes in the code point sequence
14 | *
15 | */
16 | class eucjp_validator : public jis_validator
17 | {
18 | private:
19 | constexpr static char const * m_iconvcode = "EUC-JP";
20 |
21 | public:
22 | eucjp_validator() :
23 | jis_validator(3, m_iconvcode) {};
24 | uint is_valid(byte_t const * data) const override;
25 | ~eucjp_validator() override = default;
26 | };
27 |
28 | } // namespace motoi
29 | #endif
30 |
--------------------------------------------------------------------------------
/inc/enc_shiftjis.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ENC_SHIFTJIS_HPP
2 | #define ENC_SHIFTJIS_HPP
3 | #include "jis_validator.hpp"
4 |
5 | namespace motoi
6 | {
7 |
8 | /**
9 | * @brief Determines if input data points to a valid Shift-JIS code point
10 | *
11 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the
12 | * number of bytes in the code point sequence
13 | *
14 | * @details This supports traditional Shift-JIS, which encompasses JIS X 0201 and
15 | * JIS X 0208 character sets. It does not currently support the JIS X 0213
16 | * extension set.
17 | */
18 | class shiftjis_validator : public jis_validator
19 | {
20 | private:
21 | constexpr static char const * m_iconvcode = "SHIFT-JIS";
22 |
23 | public:
24 | shiftjis_validator() :
25 | jis_validator(2, m_iconvcode) {};
26 | uint is_valid(byte_t const * data) const override;
27 | ~shiftjis_validator() override = default;
28 | ;
29 | };
30 |
31 | } // namespace motoi
32 | #endif
--------------------------------------------------------------------------------
/inc/enc_validator.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ENCODING_HPP
2 | #define ENCODING_HPP
3 |
4 | #include "types.hpp"
5 |
6 | namespace motoi
7 | {
8 | /*!
9 | * \brief Abstract for encoding validator classes
10 | */
11 | class encoding_validator
12 | {
13 | protected:
14 | uint m_max_seq_len;
15 |
16 | encoding_validator(uint max_seq_len) :
17 | m_max_seq_len {max_seq_len}
18 | {
19 | }
20 |
21 | public:
22 | encoding_validator() = delete;
23 | virtual ~encoding_validator() = default;
24 |
25 | /*!
26 | * \brief Determines if the given bytes are a valid byte sequence for the encoding.
27 | * Returns the number of valid bytes if true.
28 | */
29 | virtual uint is_valid(byte_t const * data) const = 0;
30 |
31 | uint operator()(byte_t const * data) const
32 | {
33 | return is_valid(data);
34 | }
35 |
36 | [[nodiscard]] uint max_seq_len() const
37 | {
38 | return m_max_seq_len;
39 | }
40 | };
41 | } // namespace motoi
42 |
43 | #endif
44 |
--------------------------------------------------------------------------------
/inc/filesys.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file filesys.hpp
3 | * @author Motoi Productions (Damian Rogers damian@motoi.pro)
4 | * @brief File system/path utilities
5 | *
6 | * Updates:
7 | * 20211214 Initial
8 | * 20220420 Converted to basic_string, added path parsing functions
9 | * 20220720 Change stringstream to ostringstream; added file_size & concat_paths
10 | */
11 |
12 | #ifndef __MOTOI__FILESYS_HPP
13 | #define __MOTOI__FILESYS_HPP
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | template
21 | std::basic_string concat_paths(StringT... paths)
22 | {
23 | size_t path_count = sizeof...(paths);
24 | size_t u = 0;
25 | ostringstream oss;
26 | for(auto const & s : { paths... })
27 | {
28 | ++u;
29 | if(s.empty())
30 | continue;
31 | oss << s;
32 | if(s.back() != '/' && u < path_count)
33 | oss << '/';
34 | }
35 |
36 | return oss.str();
37 | }
38 |
39 | template
40 | struct stat stat(std::basic_string const & path)
41 | {
42 | static struct stat status;
43 | if(::stat(path, &status) != 0)
44 | {
45 | std::basic_ostringstream ss;
46 | ss << "Could not open path " << path << ": " << strerror(errno);
47 | throw runtime_error(ss.str());
48 | }
49 | return status;
50 | }
51 |
52 | template
53 | bool exists(std::basic_string const & path)
54 | {
55 | static struct stat status;
56 | return (::stat(path.c_str(), &status) == 0);
57 | }
58 |
59 | template
60 | size_t file_size(std::basic_string const & path)
61 | {
62 | if(!exists(path))
63 | {
64 | std::basic_ostringstream oss;
65 | oss << "Path " << path << " does not exist";
66 | throw runtime_error(oss.str());
67 | }
68 |
69 | static struct stat status;
70 | if(::stat(path.c_str(), &status) != 0)
71 | {
72 | std::basic_ostringstream oss;
73 | oss << "Could not open path " << path << ": " << strerror(errno);
74 | throw runtime_error(oss.str());
75 | }
76 | return status.st_size;
77 | }
78 |
79 | template
80 | std::ifstream
81 | ifstream_checked(std::basic_string const & path,
82 | std::basic_string const & purpose = L"read")
83 | {
84 | std::ifstream ifs(path);
85 | if(!ifs.good())
86 | {
87 | std::basic_ostringstream oss;
88 | oss << "Could not open input path \"" << path << "\" for " << purpose
89 | << ": " << strerror(errno);
90 | throw std::runtime_error(oss.str());
91 | }
92 | return ifs;
93 | }
94 |
95 | template
96 | std::ofstream
97 | ofstream_checked(std::basic_string const & path,
98 | std::basic_string const & purpose = L"write")
99 | {
100 | std::ofstream ofs(path);
101 | if(!ofs.good())
102 | {
103 | std::basic_ostringstream oss;
104 | oss << "Could not open output path \"" << path << "\" for " << purpose
105 | << ": " << strerror(errno);
106 | throw runtime_error(oss.str());
107 | }
108 | return ofs;
109 | }
110 |
111 | template
112 | std::basic_string
113 | strip_extension(std::basic_string const & path)
114 | {
115 | auto i_at { path.find_last_of('.') };
116 | if(i_at == std::string::npos)
117 | return path;
118 | return path.substr(0, i_at);
119 | }
120 |
121 | template
122 | std::basic_string
123 | filename_from_path(std::basic_string const & path)
124 | {
125 | auto i_at { path.find_last_of('/') };
126 | if(i_at == std::string::npos)
127 | return path;
128 | return path.substr(0, i_at);
129 | }
130 |
131 | #endif
132 |
--------------------------------------------------------------------------------
/inc/fstreams.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file fstreams.hpp
3 | * @author Damian Rogers (damian@motoi.pro)
4 | * @copyright Motoi Productions / Released under MIT License
5 | * @brief Checked file streams
6 | *
7 | * Updates:
8 | * 20220916 Initial
9 | * 20221118 Changed exception to std::system_error
10 | */
11 |
12 | #ifndef __MOTOI__FSTREAMS_HPP
13 | #define __MOTOI__FSTREAMS_HPP
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | namespace motoi
21 | {
22 |
23 | template
24 | std::ifstream ifstream_checked(
25 | std::basic_string const & path, std::basic_string const & purpose = "read")
26 | {
27 | std::ifstream ifs(path);
28 | if (! ifs.good())
29 | {
30 | std::basic_ostringstream oss;
31 | oss << "Could not open input path \"" << path << "\" for " << purpose;
32 | throw std::system_error(errno, std::system_category(), oss.str());
33 | }
34 | return ifs;
35 | }
36 |
37 | template
38 | std::ofstream ofstream_checked(
39 | std::basic_string const & path, std::basic_string const & purpose = "write")
40 | {
41 | std::ofstream ofs(path);
42 | if (! ofs.good())
43 | {
44 | std::basic_ostringstream oss;
45 | oss << "Could not open output path \"" << path << "\" for " << purpose << ": " << strerror(errno);
46 | throw std::system_error(errno, std::system_category(), oss.str());
47 | }
48 | return ofs;
49 | }
50 | } // namespace motoi
51 |
52 | #endif
53 |
--------------------------------------------------------------------------------
/inc/iconv_encoding.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __MOTOI__ICONV_ENCODING_HPP
2 | #define __MOTOI__ICONV_ENCODING_HPP
3 |
4 | namespace motoi
5 | {
6 |
7 | /**
8 | * @brief Abstract class for specifying an iconv encoding code
9 | *
10 | */
11 | class iconv_encoding
12 | {
13 | protected:
14 | char const * m_iconv_code;
15 |
16 | iconv_encoding(char const * iconv_code) :
17 | m_iconv_code {iconv_code}
18 | {
19 | }
20 |
21 | public:
22 | iconv_encoding() = delete;
23 | virtual ~iconv_encoding() = default;
24 |
25 | [[nodiscard]] char const * iconv_code() const
26 | {
27 | return m_iconv_code;
28 | }
29 | };
30 |
31 | } // namespace motoi
32 | #endif
33 |
--------------------------------------------------------------------------------
/inc/jis_validator.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __MOTOI__JIS_VALIDATOR_HPP
2 | #define __MOTOI__JIS_VALIDATOR_HPP
3 |
4 | #include "enc_validator.hpp"
5 | #include "iconv_encoding.hpp"
6 |
7 | namespace motoi
8 | {
9 | /**
10 | * @brief Abstract class for JIS character set encoding validators
11 | */
12 | class jis_validator : public encoding_validator, public iconv_encoding
13 | {
14 | protected:
15 | bool m_include_crlf {false};
16 | bool m_skip_jis0201 {false};
17 |
18 | jis_validator(size_t max_seq_len, char const * iconv_code) :
19 | encoding_validator(max_seq_len),
20 | iconv_encoding(iconv_code)
21 | {
22 | }
23 |
24 | public:
25 | jis_validator() = delete;
26 |
27 | void include_crlf(bool include_crlf)
28 | {
29 | m_include_crlf = include_crlf;
30 | }
31 |
32 | [[nodiscard]] bool include_crlf() const
33 | {
34 | return m_include_crlf;
35 | }
36 |
37 | void skip_jis0201(bool skip_jis0201)
38 | {
39 | m_skip_jis0201 = skip_jis0201;
40 | }
41 |
42 | [[nodiscard]] bool skip_jis0201() const
43 | {
44 | return m_skip_jis0201;
45 | }
46 | };
47 |
48 | } // namespace motoi
49 |
50 | #endif
51 |
--------------------------------------------------------------------------------
/inc/jstrings.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __MOTOI__JSTRINGS_HPP
2 | #define __MOTOI__JSTRINGS_HPP
3 |
4 | #include "enc_validator.hpp"
5 | #include "types.hpp"
6 | #include
7 | #include
8 | #include
9 |
10 | namespace motoi
11 | {
12 |
13 | constexpr static size_t const default_match_length {5};
14 |
15 | // buffer size is 4mb
16 | constexpr static size_t const buffer_size {0x100000 * 4};
17 |
18 | /**
19 | * @brief Contains an array of data identified as a valid
20 | * string of text for a certain encoding. First value is
21 | * the offset in the source data in which the string was
22 | * found; second value is the vector of data.
23 | */
24 | using found_string = std::pair>;
25 |
26 | /**
27 | * @brief Finds strings matching a given encoding within binary data
28 | */
29 | std::vector find(
30 | std::istream & input, encoding_validator const & validator, size_t const minimum_match_length = default_match_length);
31 |
32 | } // namespace motoi
33 |
34 | #endif
35 |
--------------------------------------------------------------------------------
/inc/types.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file types.hpp
3 | * @author Damian Rogers (damian@motoi.pro)
4 | * @copyright Motoi Productions / Released under MIT License
5 | * @brief Commonly used typedefs
6 | *
7 | * Updates:
8 | * 20200101 Initial
9 | */
10 |
11 | #ifndef __MOTOI__TYPES_HPP
12 | #define __MOTOI__TYPES_HPP
13 |
14 | //#include
15 | #include
16 | #include
17 |
18 | using uchar = unsigned char;
19 | using uint = unsigned int;
20 | using ushort = unsigned short;
21 | using ulong = unsigned long;
22 |
23 | using u8 = uint8_t;
24 | using u16 = uint16_t;
25 | using u32 = uint32_t;
26 |
27 | using s8 = int8_t;
28 | using s16 = int16_t;
29 | using s32 = int32_t;
30 |
31 | using byte_t = uint8_t;
32 |
33 | template
34 | using uptr = std::unique_ptr;
35 | template
36 | using sptr = std::shared_ptr;
37 |
38 | #endif
39 |
--------------------------------------------------------------------------------
/inc/uniconv.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ENC_CP932_HPP
2 | #define ENC_CP932_HPP
3 |
4 | #include "enc_shiftjis.hpp"
5 |
6 | namespace motoi
7 | {
8 |
9 | /**
10 | * @brief Determines if input data points to a valid MS Code Page 932 code point
11 | *
12 | * @return size_t Returns 0 if not a valid sequence; otherwise returns the
13 | * number of bytes in the code point sequence
14 | *
15 | */
16 | class cp932_validator : public shiftjis_validator
17 | {
18 | public:
19 | cp932_validator()
20 | {
21 | m_iconv_code = "CP932";
22 | }
23 |
24 | uint is_valid(byte_t const * data) const override;
25 | ~cp932_validator() override = default;
26 | };
27 |
28 | } // namespace motoi
29 |
30 | #endif /** \
31 | * @file uniconv.hpp \
32 | * @author Damian Rogers (damian@motoi.pro) \
33 | * @copyright Motoi Productions / Released under MIT License \
34 | * @brief Wrapper for iconv to convert to utf8 \
35 | * \
36 | * Updates: \
37 | * 20221120 Initial \
38 | */
39 |
40 | #ifndef __MOTOI__UNICONV_HPP
41 | #define __MOTOI__UNICONV_HPP
42 |
43 | #include
44 | #include
45 | #include
46 | #include
47 | #include
48 | #include
49 |
50 | namespace motoi
51 | {
52 | class uniconv
53 | {
54 | private:
55 | constexpr static char const * m_convcode = "UTF-8";
56 | iconv_t m_conv;
57 |
58 | public:
59 | uniconv(uniconv const &) = delete;
60 | uniconv & operator=(uniconv const &) = delete;
61 | uniconv(uniconv &&) = default;
62 | uniconv & operator=(uniconv &&) = default;
63 |
64 | uniconv(const char * fromcode)
65 | {
66 | m_conv = iconv_open(uniconv::m_convcode, fromcode);
67 |
68 | if (m_conv == (iconv_t) -1)
69 | {
70 | std::ostringstream oss;
71 | oss << "Could not open converter using encoding " << fromcode;
72 | throw std::system_error(errno, std::generic_category(), oss.str());
73 | }
74 | }
75 |
76 | ~uniconv()
77 | {
78 | iconv_close(m_conv);
79 | }
80 |
81 | std::string convert(char * in, size_t in_size)
82 | {
83 | std::string out(in_size * 2, '\0');
84 |
85 | char * p_in = in;
86 | char * p_out = (char *) out.data();
87 | size_t out_size = out.size();
88 | size_t result;
89 |
90 | // clang-format off
91 | while ((result = iconv
92 | (m_conv, &p_in, &in_size, &p_out, &out_size)
93 | == -1))
94 | // clang-format on
95 | {
96 | // received some kind of error status
97 | if (errno == EILSEQ || errno == EINVAL)
98 | {
99 | ptrdiff_t pos = p_in - in;
100 | std::ostringstream oss;
101 | oss << "Encountered invalid byte sequence at position " << pos;
102 | throw std::system_error(errno, std::generic_category(), oss.str());
103 | }
104 |
105 | // output buffer out of space, resize and retry
106 | if (errno == E2BIG)
107 | {
108 | ptrdiff_t pos = p_out - out.data();
109 | out.resize(out.size() * 2);
110 | p_out = out.data() + pos;
111 | out_size = out.size() - pos;
112 | }
113 | }
114 |
115 | out.resize(p_out - out.data());
116 |
117 | return out;
118 | }
119 |
120 | std::string convert(std::vector & in)
121 | {
122 | return convert(in.data(), in.size());
123 | }
124 |
125 | std::string convert(std::vector & in)
126 | {
127 | return convert((char *) in.data(), in.size());
128 | }
129 |
130 | std::string convert(std::string & in)
131 | {
132 | return convert(in.data(), in.size());
133 | }
134 | };
135 | } // namespace motoi
136 |
137 | #endif
138 |
--------------------------------------------------------------------------------
/inc/usage.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file usage.hpp
3 | * @author Motoi Productions (Damian Rogers damian@motoi.pro)
4 | * @brief Display program options/usage
5 | *
6 | * Updates:
7 | * 20220415 Initial
8 | * 20220722 Using const in show_usage parameters
9 | * 20220914 Added show_version
10 | * 20221118 Added copyright field; change to non-wide characters to properly support
11 | * UTF-8; updates to match changes to match app.hpp.cfg
12 | */
13 |
14 | #ifndef __MOTOI__USAGE_HPP
15 | #define __MOTOI__USAGE_HPP
16 |
17 | #include
18 | #include
19 | #include
20 | #include
21 |
22 | namespace motoi
23 | {
24 | struct option_details
25 | {
26 | bool const required;
27 | char const * desc;
28 | char const * arg_type;
29 | };
30 |
31 | /**
32 | * @brief Displays program version
33 | *
34 | * @param output stream to write the version
35 | */
36 | void show_version(std::ostream & output);
37 |
38 | /**
39 | * @brief Displays program options and usage
40 | *
41 | * @param opts array of @c option structs; final entry should be all zero
42 | * @param details array of @c option_details structs; must be the same size and
43 | * order as @c opts
44 | * @param output stream to write the usage
45 | */
46 | void show_usage(option const * opts, option_details const * details, std::ostream & output = std::cout);
47 |
48 | } // namespace motoi
49 |
50 | #endif
51 |
--------------------------------------------------------------------------------
/src/app.hpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @author Damian R (damian@motoi.pro)
3 | * @brief A tool for finding JIS-based Japanese text in binary data.
4 | * @version 1.6
5 | *
6 | * @copyright ©2018 Motoi Productions / Released under MIT License
7 | *
8 | */
9 |
10 | #ifndef __MOTOI__APP_HPP
11 | #define __MOTOI__APP_HPP
12 |
13 | /*
14 | These values should be set within CMakeLists.txt
15 | */
16 | namespace app
17 | {
18 | static unsigned int const version_major {1};
19 | static unsigned int const version_minor {6};
20 | static unsigned int const version_patch {};
21 | static char const * version {"1.6"};
22 |
23 | static char const * name {"jstrings"};
24 | static char const * brief {"A tool for finding JIS-based Japanese text in binary data."};
25 | static char const * copyright {"©2018 Motoi Productions / Released under MIT License"};
26 | static char const * contact {"Damian R (damian@motoi.pro)"};
27 | static char const * website {"https://github.com/drojaazu"};
28 | } // namespace app
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/src/app.hpp.cfg:
--------------------------------------------------------------------------------
1 | /**
2 | * @author @PROJECT_CONTACT@
3 | * @brief @PROJECT_BRIEF@
4 | * @version @PROJECT_VERSION@
5 | *
6 | * @copyright @PROJECT_COPYRIGHT@
7 | *
8 | */
9 |
10 | #ifndef __MOTOI__APP_HPP
11 | #define __MOTOI__APP_HPP
12 |
13 | /*
14 | These values should be set within CMakeLists.txt
15 | */
16 | namespace app
17 | {
18 | static unsigned int const version_major {@PROJECT_VERSION_MAJOR@};
19 | static unsigned int const version_minor {@PROJECT_VERSION_MINOR@};
20 | static unsigned int const version_patch {@PROJECT_VERSION_PATCH@};
21 | static char const * version {"@PROJECT_VERSION@"};
22 |
23 | static char const * name {"@PROJECT_NAME@"};
24 | static char const * brief {"@PROJECT_BRIEF@"};
25 | static char const * copyright {"@PROJECT_COPYRIGHT@"};
26 | static char const * contact {"@PROJECT_CONTACT@"};
27 | static char const * website {"@PROJECT_WEBSITE@"};
28 | } // namespace app
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/src/enc_cp932.cpp:
--------------------------------------------------------------------------------
1 | #include "enc_cp932.hpp"
2 |
3 | namespace motoi
4 | {
5 |
6 | uint cp932_validator::is_valid(byte_t const * data) const
7 | {
8 | u8 valid_count = shiftjis_validator::is_valid(data);
9 | if (valid_count > 0)
10 | return valid_count;
11 | else
12 | {
13 | u8 c_hi {*data};
14 | u8 c_lo {*(data + 1)};
15 |
16 | /*
17 | ku 13 - lead byte 0x87 - NEC tokushu moji
18 | 0x40 to 0x5d, 0x5f to 0x75, 0x7e, 0x80 to 0x8f, 0x93, 0x94,
19 | 0x98, 0x99 ku 89-92 - lead byte 0xed, 0xee - NEC sentei IBM kakuchou moji
20 | 0x40 to 0x7e, 0x80 to 0xfc
21 | ku 115-119 - lead byte 0xfa to 0xfc - IBM kakuchou moji
22 | 0x40 to 0x7e, 0x80 to 0xfc (except 0xfc: 0x40 to 0x4b)
23 | */
24 | switch (c_hi)
25 | {
26 | case 0x87:
27 | if (((c_lo >= 0x40) & (c_lo <= 0x5d)) || ((c_lo >= 0x5f) & (c_lo <= 0x75)) || (c_lo == 0x7e) ||
28 | ((c_lo >= 0x80) & (c_lo <= 0x8f)) || (c_lo == 0x93) || (c_lo == 0x94) || (c_lo == 0x98) || (c_lo == 0x99))
29 | return 2;
30 | break;
31 | case 0xed:
32 | case 0xee:
33 | case 0xfa:
34 | case 0xfb:
35 | // 0x7f should already be excluded from the base shiftjis check
36 | if ((c_lo >= 0x40) & (c_lo <= 0xfc))
37 | return 2;
38 | case 0xfc:
39 | if ((c_lo >= 0x40) & (c_lo <= 0x4b))
40 | return 2;
41 | }
42 | }
43 | return 0;
44 | }
45 | } // namespace motoi
46 |
--------------------------------------------------------------------------------
/src/enc_eucjp.cpp:
--------------------------------------------------------------------------------
1 | #include "enc_eucjp.hpp"
2 |
3 | namespace motoi
4 | {
5 |
6 | uint eucjp_validator::is_valid(byte_t const * data) const
7 | {
8 |
9 | u8 c_hi {*data};
10 |
11 | // ASCII except control characters (including tab and conditionally newlines)
12 | if ((c_hi == 0x09) || ((c_hi >= 0x20) & (c_hi <= 0x7e)) || (m_include_crlf && (c_hi == 0x0a)))
13 | return 1;
14 |
15 | u8 c_lo {*(data + 1)};
16 |
17 | if (m_include_crlf && ((c_hi == 0x0d) && (c_lo == 0x0a)))
18 | return 2;
19 |
20 | /*
21 | JIS X 0201
22 | the raw 0201 code prefixed with 0x8E
23 | */
24 | if (! m_skip_jis0201 && (c_hi == 0x8e))
25 | {
26 | if ((c_lo >= 0xa1) & (c_lo <= 0xdf))
27 | return 2;
28 | }
29 | /*
30 | JIS X 0208
31 | standard 0208 code with top bit set
32 | Partial ku
33 | 0xa2 - 0xa1 to 0xae, 0xba to 0xc1, 0xca to 0xd0, 0xdc to 0xea, 0xf2 to 0xf9, 0xfe
34 | 0xa3 - 0xb0 to 0xb9, 0xc1 to 0xda, 0xe1 to 0xfa
35 | 0xa4 - 0xa1 to 0xf3
36 | 0xa5 - 0xa1 to 0xf6
37 | 0xa6 - 0xa1 to 0xb8, 0xc1 to 0xd8
38 | 0xa7 - 0xa1 to 0xc1, 0xd1 to 0xf1
39 | 0xa8 - 0xa1 to 0xc0
40 | 0xcf - 0xa1 to 0xd3
41 | 0xf4 - 0xa1 to 0xa6
42 | */
43 | switch (c_hi)
44 | {
45 | case 0xa2:
46 | if ((c_lo >= 0xa1) & (c_lo <= 0xae) || (c_lo >= 0xba) & (c_lo <= 0xc1) || (c_lo >= 0xca) & (c_lo <= 0xd0) ||
47 | (c_lo >= 0xdc) & (c_lo <= 0xea) || (c_lo >= 0xf2) & (c_lo <= 0xf9) || (c_lo == 0xfe))
48 | return 2;
49 | return false;
50 | case 0xa3:
51 | if ((c_lo >= 0xb0) & (c_lo <= 0xb9) || (c_lo >= 0xc1) & (c_lo <= 0xda) || (c_lo >= 0xe1) & (c_lo <= 0xfa))
52 | return 2;
53 | return false;
54 | case 0xa4:
55 | if ((c_lo >= 0xa1) & (c_lo <= 0xf3))
56 | return 2;
57 | return false;
58 | case 0xa5:
59 | if ((c_lo >= 0xa1) & (c_lo <= 0xf6))
60 | return 2;
61 | return false;
62 | case 0xa6:
63 | if ((c_lo >= 0xa1) & (c_lo <= 0xb8) || (c_lo >= 0xc1) & (c_lo <= 0xd8))
64 | return 2;
65 | return false;
66 | case 0xa7:
67 | if ((c_lo >= 0xa1) & (c_lo <= 0xc1) || (c_lo >= 0xd1) & (c_lo <= 0xf1))
68 | return 2;
69 | return false;
70 | case 0xa8:
71 | if ((c_lo >= 0xa1) & (c_lo <= 0xc0))
72 | return 2;
73 | return false;
74 | case 0xcf:
75 | if ((c_lo >= 0xa1) & (c_lo <= 0xd3))
76 | return 2;
77 | return false;
78 | case 0xf4:
79 | if ((c_lo >= 0xa1) & (c_lo <= 0xa6))
80 | return 2;
81 | return false;
82 | }
83 |
84 | /*
85 | Full ku (lo bytes 0xa1 to 0xfe)
86 | 0xa1, 0xb0 to 0xce, 0xd0 to 0xf3
87 | */
88 | if (((c_hi == 0xa1) || ((c_hi >= 0xb0) & (c_hi <= 0xce)) || ((c_hi >= 0xd0) & (c_hi <= 0xf3))) &&
89 | ((c_lo >= 0xa1) & (c_lo <= 0xfe)))
90 | return 2;
91 |
92 | /*
93 | JIS X 0212
94 | 0208 extension, so only a few ku are present
95 | 0208 code prefixed by 0x8f
96 | */
97 | if (c_hi == 0x8f)
98 | {
99 | u8 c_md = c_lo;
100 | c_lo = *(data + 2);
101 |
102 | /*
103 | Partial ku:
104 | 0xa2 - 0xaf to 0xb9, 0xc2 to 0xc4, 0xeb to 0xf1
105 | 0xa6 - 0xe1 to 0xe5, 0xe7, 0xe9, 0xea, 0xec, 0xf1 to 0xfc
106 | 0xa7 - 0xc2 to 0xce, 0xf2 to 0xfe
107 | 0xa9 - 0xa1, 0xa2, 0xa4, 0xa6, 0xa8, 0xa9, 0xab to 0xad, 0xaf, 0xb0, 0xc1 to 0xd0
108 | 0xaa - 0xa1 to 0xb8, 0xba to 0xf7
109 | 0xab - 0xa1 to 0xbb, 0xbd to 0xc3, 0xc5 to 0xf7
110 | 0xed - 0xa1 to 0xe3
111 | */
112 | switch (c_md)
113 | {
114 | case 0xa2:
115 | if (((c_lo >= 0xaf) & (c_lo <= 0xb9)) || ((c_lo >= 0xc2) & (c_lo <= 0xc4)) || ((c_lo >= 0xeb) & (c_lo <= 0xf1)))
116 | return 3;
117 | return false;
118 | case 0xa6:
119 | if (((c_lo >= 0xe1) & (c_lo <= 0xe5)) || (c_lo == 0xe7) || (c_lo == 0xe9) || (c_lo == 0xea) || (c_lo == 0xec) ||
120 | ((c_lo >= 0xf1) & (c_lo <= 0xfc)))
121 | return 3;
122 | return false;
123 | case 0xa7:
124 | if (((c_lo >= 0xc2) & (c_lo <= 0xce)) || ((c_lo >= 0xf2) & (c_lo <= 0xfe)))
125 | return 3;
126 | return false;
127 | case 0xa9:
128 | if ((c_lo == 0xa1) || (c_lo == 0xa2) || (c_lo == 0xa4) || (c_lo == 0xa6) || (c_lo == 0xa8) || (c_lo == 0xa9) ||
129 | ((c_lo >= 0xab) & (c_lo <= 0xad)) || (c_lo == 0xaf) || (c_lo == 0xb0) || ((c_lo >= 0xc1) & (c_lo <= 0xd0)))
130 | return 3;
131 | return false;
132 | case 0xaa:
133 | if (((c_lo >= 0xa1) & (c_lo <= 0xb8)) || ((c_lo >= 0xba) & (c_lo <= 0xf7)))
134 | return 3;
135 | return false;
136 | case 0xab:
137 | if (((c_lo >= 0xa1) & (c_lo <= 0xbb)) || ((c_lo >= 0xbd) & (c_lo <= 0xc3)) || ((c_lo >= 0xc5) & (c_lo <= 0xf7)))
138 | return 3;
139 | return false;
140 | case 0xed:
141 | if ((c_lo >= 0xa1) & (c_lo <= 0xe3))
142 | return 3;
143 | return false;
144 | }
145 |
146 | // Full ku (lo bytes 0xa1 to 0xfe)
147 | // 0xb0 to 0xec
148 | if (((c_md >= 0xb0) & (c_md <= 0xec)) && ((c_lo >= 0xa1) & (c_lo <= 0xfe)))
149 | return 3;
150 | }
151 | return false;
152 | }
153 |
154 | } // namespace motoi
155 |
--------------------------------------------------------------------------------
/src/enc_shiftjis.cpp:
--------------------------------------------------------------------------------
1 | #include "enc_shiftjis.hpp"
2 |
3 | namespace motoi
4 | {
5 |
6 | uint shiftjis_validator::is_valid(byte_t const * data) const
7 | {
8 | /*
9 | JIS X 0201 - 8-bit characters (including 7-bit ASCII)
10 | excludes non-printable (control code) and reserved bytes
11 | (but include tab (0x09))
12 | */
13 | u8 c_hi {*data};
14 | // clang-format off
15 | if ((c_hi == 0x09) || (c_hi >= 0x20) & (c_hi <= 0x7e)
16 | || ((! m_skip_jis0201) && ((c_hi >= 0xa1) & (c_hi <= 0xdf)))
17 | )
18 | return 1;
19 | // clang-format on
20 |
21 | // JIS X 0208 - 16 bit characters
22 | u8 c_lo {*(data + 1)};
23 |
24 | if (m_include_crlf && (c_hi == 0x0a || (c_hi == 0x0d && c_lo == 0x0a)))
25 | return 1;
26 |
27 | // sjis lower byte can never be these values
28 | if ((c_lo >= 0x0) & (c_lo <= 0x3f) || (c_lo == 0x7f) || (c_lo >= 0xfd) & (c_lo <= 0xff))
29 | return 0;
30 |
31 | /*
32 | Partial fields (always excluding 0x7f)
33 | 0x81 - 0x40 to 0xac, 0xb8 to 0xbf, 0xc8 to 0xce, 0xda to 0xe8, 0xf0 to
34 | 0xf7, 0xfc 0x82 - 0x4f to 0x58, 0x60 to 0x79, 0x81 to 0x9a, 0x9f to 0xf1
35 | 0x83 - 0x40 to 0x96, 0x9f to 0xb6, 0xbf to 0xd6
36 | 0x84 - 0x40 to 0x60, 0x70 to 0x91, 0x9f to 0xbe
37 | 0x88 - 0x9f to 0xfc
38 | 0x98 - 0x40 to 0x72, 0x9f to 0xfc
39 | 0xea - 0x40 to 0xa4
40 | */
41 | switch (c_hi)
42 | {
43 | case 0x81:
44 | if ((c_lo >= 0x40) & (c_lo <= 0xac) || (c_lo >= 0xb8) & (c_lo <= 0xbf) || (c_lo >= 0xc8) & (c_lo <= 0xce) ||
45 | (c_lo >= 0xda) & (c_lo <= 0xe8) || (c_lo >= 0xf0) & (c_lo <= 0xf7) || (c_lo == 0xfc))
46 | return 2;
47 | return 0;
48 | case 0x82:
49 | if ((c_lo >= 0x4f) & (c_lo <= 0x58) || (c_lo >= 0x60) & (c_lo <= 0x79) || (c_lo >= 0x81) & (c_lo <= 0x9a) ||
50 | (c_lo >= 0x9f) & (c_lo <= 0xf1))
51 | return 2;
52 | return 0;
53 | case 0x83:
54 | if ((c_lo >= 0x40) & (c_lo <= 0x96) || (c_lo >= 0x9f) & (c_lo <= 0xb6) || (c_lo >= 0xbf) & (c_lo <= 0xd6))
55 | return 2;
56 | return 0;
57 | case 0x84:
58 | if ((c_lo >= 0x40) & (c_lo <= 0x60) || (c_lo >= 0x70) & (c_lo <= 0x91) || (c_lo >= 0x9f) & (c_lo <= 0xbe))
59 | return 2;
60 | return 0;
61 | case 0x88:
62 | if ((c_lo >= 0x9f) & (c_lo <= 0xfc))
63 | return 2;
64 | return 0;
65 | case 0x98:
66 | if ((c_lo >= 0x40) & (c_lo <= 0x72) || (c_lo >= 0x9f) & (c_lo <= 0xfc))
67 | return 2;
68 | return 0;
69 | case 0xea:
70 | if ((c_lo >= 0x40) & (c_lo <= 0xa4))
71 | return 2;
72 | return 0;
73 | }
74 |
75 | /*
76 | Full fields (0x40 to 0xfc, excluding 0x7f)
77 | 0x89 to 0x97, 0x99 to 0x9f, 0xe0 to 0xe9
78 | */
79 | // clang-format off
80 | if (
81 | (((c_hi >= 0x89) & (c_hi <= 0x97))
82 | || ((c_hi >= 0x99) & (c_hi <= 0x9f))
83 | || ((c_hi >= 0xe0) & (c_hi <= 0xe9))
84 | )
85 | && ((c_lo >= 0x40) & (c_lo <= 0xfc))
86 | )
87 | return 2;
88 | // clang-format on
89 | return 0;
90 | }
91 |
92 | } // namespace motoi
93 |
--------------------------------------------------------------------------------
/src/jstrings.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "jstrings.hpp"
3 | #include "enc_validator.hpp"
4 | #include
5 | #include
6 | #include
7 |
8 | namespace motoi
9 | {
10 |
11 | using namespace std;
12 |
13 | vector find(istream & is, encoding_validator const & enc, size_t const minimum_match_length)
14 | {
15 |
16 | vector out;
17 |
18 | // work buffer - input file is read/procesed in chunks of buffer_size bytes
19 | uptr buffer(new byte_t[buffer_size]);
20 | byte_t * p_buffer = buffer.get();
21 |
22 | /*
23 | Pointer to the end of the data within the buffer
24 | This will be (buffer + buffer_size) most of the time except for the
25 | final read of the stream where there will likely be less than a full
26 | buffer worth of data
27 | */
28 | byte_t * p_buffer_end {nullptr};
29 |
30 | // the current byte(s) we are examining within the buffer
31 | byte_t * p_curr {nullptr};
32 |
33 | /*
34 | Due to the variable length nature of bytes, we set a "boundary"
35 | near the end of the buffer, at:
36 | (p_buffer_end - enc_validator.max_sequence_length)
37 | This prevents false negatives when a multibyte character is split
38 | across buffer reads.
39 | Once the read pointer is past the boundary, the remaining,
40 | unprocessed bytes are moved to the front of the buffer
41 | and the buffer is filled with the next read from the stream
42 | less the bytes moved to the front.
43 | */
44 | byte_t * p_boundary {nullptr};
45 |
46 | // Track our location within the source data
47 | size_t source_offset {0};
48 |
49 | /*
50 | The address of the start of a valid string in terms of
51 | the *source data* rather than the buffer
52 | */
53 | size_t valid_start_offset {0};
54 |
55 | /*
56 | Start and end pointers for a string of valid data with
57 | the buffer
58 | */
59 | byte_t * p_valid_start {nullptr};
60 | byte_t * p_valid_end {nullptr};
61 |
62 | /*
63 | If p_valid_start is set and we have not yet determined the
64 | end of the string, but we have reached the boundary and
65 | are reloading the buffer, the valid data found so far needs
66 | to be perserved in a secondary buffer. In this case, we
67 | go ahead and allocate a new found_string with the data so
68 | far and set the flag below, which will indicate that the
69 | remaining data should be appended to the last found_string
70 | in the list.
71 | */
72 | bool crossed_boundary {false};
73 |
74 | size_t valid_string_length {0};
75 | size_t advance_read_pointer_count {0};
76 | int remaining_count {0};
77 |
78 | while (is.good())
79 | {
80 | // prepare buffer
81 | if (p_valid_start != nullptr)
82 | {
83 | // we have a valid string in progress
84 | out.push_back({valid_start_offset, {p_valid_start, p_curr}});
85 | crossed_boundary = true;
86 | }
87 |
88 | // always clear the buffer since we don't know how many bytes
89 | // will actually be read
90 | memset((char *) (p_buffer + remaining_count), 0, (buffer_size - remaining_count));
91 | is.read((char *) (p_buffer + remaining_count), (buffer_size - remaining_count));
92 | streamsize buffer_read_size = is.gcount() + remaining_count;
93 |
94 | p_buffer_end = p_buffer + buffer_read_size;
95 | p_curr = p_buffer;
96 |
97 | /*
98 | if we're on the final buffer read (is.eof()) or if the data we read is smaller
99 | than the max sequence length of the encoding, then don't bother setting a boundary
100 | */
101 | if ((buffer_read_size < enc.max_seq_len()) || is.eof())
102 | p_boundary = p_buffer + buffer_read_size;
103 | else
104 | p_boundary = p_buffer + (buffer_read_size - enc.max_seq_len());
105 |
106 | while (p_curr < p_boundary)
107 | {
108 | auto valid_byte_count {enc(p_curr)};
109 | if (valid_byte_count > 0)
110 | {
111 | // found a valid sequence
112 | if (p_valid_start == nullptr)
113 | {
114 | // starting a new valid string
115 | p_valid_start = p_curr;
116 | valid_start_offset = source_offset;
117 | }
118 |
119 | ++valid_string_length;
120 | // move to the next byte after this valid grouping
121 | advance_read_pointer_count = valid_byte_count;
122 | }
123 | else
124 | {
125 | // hit an invalid byte
126 | if (p_valid_start != nullptr)
127 | {
128 | if (valid_string_length >= minimum_match_length)
129 | {
130 | // valid_start was set, so we're finishing a valid string
131 | p_valid_end = p_curr;
132 | if (crossed_boundary)
133 | {
134 | out.back().second.insert(out.back().second.end(), p_buffer, p_valid_end);
135 | }
136 | else
137 | {
138 | // clang-format off
139 | out.push_back ({
140 | valid_start_offset,
141 | {p_valid_start, p_valid_end}
142 | });
143 | // clang-format on
144 | }
145 |
146 | if (out.size() >= out.max_size())
147 | return out;
148 | }
149 |
150 | if (crossed_boundary)
151 | crossed_boundary = false;
152 |
153 | p_valid_start = nullptr;
154 | p_valid_end = nullptr;
155 | }
156 |
157 | valid_string_length = 0;
158 |
159 | // move to next byte as this one was invalid
160 | advance_read_pointer_count = 1;
161 | }
162 |
163 | // increase pointer and offset
164 | p_curr += advance_read_pointer_count;
165 | source_offset += advance_read_pointer_count;
166 | }
167 |
168 | remaining_count = p_buffer_end - p_curr;
169 | if (remaining_count > 0)
170 | copy(p_curr, p_buffer_end, p_buffer);
171 | }
172 |
173 | // finished processing all input
174 | // check if we have an unfinished valid string
175 | if ((p_valid_start != nullptr) && (valid_string_length >= minimum_match_length))
176 | {
177 | // clang-format off
178 | out.push_back ({
179 | valid_start_offset,
180 | {p_valid_start, p_curr}
181 | });
182 | // clang-format on
183 | }
184 |
185 | return out;
186 | }
187 | } // namespace motoi
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include "enc_cp932.hpp"
2 | #include "enc_eucjp.hpp"
3 | #include "enc_shiftjis.hpp"
4 | #include "fstreams.hpp"
5 | #include "jstrings.hpp"
6 | #include "uniconv.hpp"
7 | #include "usage.hpp"
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 |
17 | #ifdef DEBUG
18 | #include
19 | #endif
20 |
21 | using namespace std;
22 | using namespace motoi;
23 |
24 | static char constexpr CUTOFF_INDICATOR[] {"..."};
25 |
26 | struct runtime_config_jstrings
27 | {
28 | public:
29 | string input_path;
30 | string encoding {"shiftjis"};
31 | size_t match_length {default_match_length};
32 | size_t cutoff {0};
33 | bool multiline {false};
34 | bool raw {false};
35 | bool skip_jis0201 {false};
36 | } cfg;
37 |
38 | void process_args(int argc, char ** argv);
39 |
40 | int main(int argc, char ** argv)
41 | {
42 | #ifdef DEBUG
43 | chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::now();
44 | #endif
45 | istream * indata {nullptr};
46 | ifstream infile;
47 |
48 | shiftjis_validator shiftjis_valid;
49 | cp932_validator cp932_valid;
50 | eucjp_validator eucjp_valid;
51 |
52 | // clang-format off
53 | unordered_map validators
54 | {
55 | {"shiftjis", &shiftjis_valid}, {"shift-jis", &shiftjis_valid}, {"sjis", &shiftjis_valid},
56 | {"cp932", &cp932_valid}, {"windows932", &cp932_valid}, {"windows31j", &cp932_valid},
57 | {"eucjp", &cp932_valid}, {"euc-jp", &cp932_valid}
58 | };
59 | // clang-format on
60 |
61 | if (validators.count(cfg.encoding) == 0)
62 | {
63 | throw invalid_argument("Invalid encoding specified");
64 | }
65 |
66 | try
67 | {
68 | process_args(argc, argv);
69 |
70 | if (cfg.input_path.empty())
71 | indata = &cin;
72 | else
73 | {
74 | try
75 | {
76 | infile = ifstream_checked(cfg.input_path);
77 | }
78 | catch (system_error const & sys_ex)
79 | {
80 | cerr << sys_ex.what() << ": " << sys_ex.code().message() << endl;
81 | return -1;
82 | }
83 | indata = &infile;
84 | }
85 |
86 | #ifdef DEBUG
87 | chrono::high_resolution_clock::time_point t2 = chrono::high_resolution_clock::now();
88 | auto duration = chrono::duration_cast(t2 - t1).count();
89 |
90 | cerr << "SETUP: " << duration << "ms" << endl;
91 | #endif
92 |
93 | #ifdef DEBUG
94 | t1 = chrono::high_resolution_clock::now();
95 | #endif
96 |
97 | jis_validator * validator {validators[cfg.encoding]};
98 | validator->include_crlf(cfg.multiline);
99 | validator->skip_jis0201(cfg.skip_jis0201);
100 | uniconv conv(validator->iconv_code());
101 |
102 | auto found_strings = find(*indata, *validator, cfg.match_length);
103 | #ifdef DEBUG
104 | t2 = chrono::high_resolution_clock::now();
105 | duration = chrono::duration_cast(t2 - t1).count();
106 |
107 | cerr << "VALIDATION & COPY: " << duration << "ms" << endl;
108 | #endif
109 |
110 | #ifdef DEBUG
111 | t1 = chrono::high_resolution_clock::now();
112 | #endif
113 | cout << showbase << internal << setfill('0') << hex;
114 |
115 | size_t counter = 0;
116 | string as_utf8;
117 | for (auto & this_string : found_strings)
118 | {
119 | cout << setw(10) << this_string.first << ' ';
120 | if (cfg.raw)
121 | {
122 | copy(this_string.second.data(),
123 | this_string.second.data() + this_string.second.size(),
124 | ostream_iterator(cout));
125 | }
126 | else
127 | {
128 | as_utf8 = conv.convert(this_string.second);
129 | if (cfg.cutoff != 0 && as_utf8.size() > cfg.cutoff)
130 | cout << string({as_utf8.begin(), as_utf8.begin() + cfg.cutoff}) << CUTOFF_INDICATOR;
131 | else
132 | cout << as_utf8;
133 | }
134 | cout << endl;
135 | ++counter;
136 | }
137 | #ifdef DEBUG
138 | t2 = chrono::high_resolution_clock::now();
139 | duration = chrono::duration_cast(t2 - t1).count();
140 |
141 | cerr << "OUTPUT: " << duration << "ms" << endl;
142 | #endif
143 | }
144 | catch (exception const & e)
145 | {
146 | cout << "Uncaught Exception: " << e.what() << endl;
147 | }
148 | }
149 |
150 | void process_args(int argc, char ** argv)
151 | {
152 | // clang-format off
153 | string const short_opts {":l:c:e:mrsh"};
154 | vector const long_opts {
155 | {"match-length", required_argument, nullptr, 'l'},
156 | {"cutoff", required_argument, nullptr, 'c'},
157 | {"encoding", required_argument, nullptr, 'e'},
158 | {"multiline", no_argument, nullptr, 'm'},
159 | {"raw", no_argument, nullptr, 'r'},
160 | {"skip-jis0201", no_argument, nullptr, 's'},
161 | {"help", no_argument, nullptr, 'h'},
162 | {nullptr, 0, nullptr, 0}
163 | };
164 |
165 | vector const opt_details
166 | {
167 | {false, "Specify number of sequential characters required to qualify as a string ", nullptr},
168 | {false, "Specify maximum number of characters to display in a single string", nullptr},
169 | {false, "Specify text encoding to use", "shiftjis|cp932|eucjp"},
170 | {false, "Do not split multiline strings", nullptr},
171 | {false, "Output the data in its original encoding without converting to unicode", nullptr},
172 | {false, "Skip JIS 0201 (8-bit) bytes", nullptr},
173 | {false, "Display usage", nullptr}
174 | };
175 | // clang-format on
176 |
177 | while (true)
178 | {
179 | const auto this_opt = getopt_long(argc, argv, short_opts.data(), long_opts.data(), nullptr);
180 |
181 | if (this_opt == -1)
182 | break;
183 |
184 | switch (this_opt)
185 | {
186 | case 'l':
187 | cfg.match_length = strtoul(optarg, nullptr, 10);
188 | break;
189 | case 'c':
190 | cfg.cutoff = strtoul(optarg, nullptr, 10);
191 | break;
192 | case 'e':
193 | cfg.encoding = optarg;
194 | break;
195 | case 'm':
196 | cfg.multiline = true;
197 | break;
198 | case 'r':
199 | cfg.raw = true;
200 | break;
201 | case 's':
202 | cfg.skip_jis0201 = true;
203 | break;
204 | case 'h':
205 | show_usage(long_opts.data(), opt_details.data(), cout);
206 | exit(0);
207 | break;
208 | case ':':
209 | cerr << "Missing argument" << endl;
210 | exit(1);
211 | break;
212 | case '?':
213 | cerr << "Invalid option" << endl;
214 | exit(1);
215 | break;
216 | default:
217 | show_usage(long_opts.data(), opt_details.data(), cout);
218 | exit(1);
219 | break;
220 | }
221 | }
222 |
223 | if (optind < argc)
224 | {
225 | // only read the first non-option argument, assuming it is input filename
226 | cfg.input_path = argv[optind];
227 | }
228 |
229 | // sanity checking
230 | if (cfg.match_length < 1)
231 | throw invalid_argument("Match length must be a positive value");
232 | }
233 |
--------------------------------------------------------------------------------
/src/usage.cpp:
--------------------------------------------------------------------------------
1 | /**
2 | * @file usage.cpp
3 | * @author Motoi Productions (Damian Rogers damian@motoi.pro)
4 | * @brief Display program options/usage
5 | *
6 | * Updates:
7 | * 20220415 Initial
8 | * 20220722 Using const in show_usage parameters
9 | * 20220914 Added show_version
10 | * 20221118 Added copyright field; change to non-wide characters to properly
11 | * support UTF-8; updates to match changes to match app.hpp.cfg
12 | */
13 |
14 | #include "usage.hpp"
15 | #include "app.hpp"
16 | #include
17 | #include
18 | #include
19 |
20 | namespace motoi {
21 |
22 | using namespace std;
23 |
24 | void show_version(ostream &output) {
25 | ostringstream ss;
26 | ss << app::name << " - version " << app::version << endl;
27 | ss << app::copyright << endl;
28 | ss << app::contact << " / " << app::website << endl;
29 |
30 | output << ss.str();
31 | }
32 |
33 | void show_usage(option const *opts, option_details const *details,
34 | ostream &output) {
35 | setlocale(LC_ALL, "");
36 |
37 | show_version(output);
38 |
39 | ostringstream ss;
40 | ss << endl << "Usage:" << endl;
41 |
42 | while (true) {
43 | if (opts->name == nullptr)
44 | break;
45 |
46 | ss << " --" << opts->name << ", -" << (char)opts->val;
47 | if (opts->has_arg == required_argument) {
48 | if (details->arg_type != nullptr)
49 | ss << " <" << details->arg_type << ">";
50 | else
51 | ss << " ";
52 | }
53 | if (opts->has_arg == optional_argument) {
54 | if (details->arg_type != nullptr)
55 | ss << " arg_type << ">";
56 | else
57 | ss << " ";
58 | }
59 | ss << endl;
60 | ss << " ";
61 | if (details->required)
62 | ss << "[Required] ";
63 |
64 | ss << details->desc << endl;
65 |
66 | ++opts;
67 | ++details;
68 | }
69 |
70 | output << ss.str();
71 | }
72 | } // namespace motoi
73 |
--------------------------------------------------------------------------------