├── LICENSE.txt
├── README.md
├── c
    ├── unibinary.xcodeproj
    │   ├── project.pbxproj
    │   ├── project.xcworkspace
    │   │   ├── contents.xcworkspacedata
    │   │   ├── xcshareddata
    │   │   │   └── unibinary.xccheckout
    │   │   └── xcuserdata
    │   │   │   └── nst.xcuserdatad
    │   │   │       ├── UserInterfaceState.xcuserstate
    │   │   │       └── WorkspaceSettings.xcsettings
    │   └── xcuserdata
    │   │   └── nst.xcuserdatad
    │   │       ├── xcdebugger
    │   │           └── Breakpoints_v2.xcbkptlist
    │   │       └── xcschemes
    │   │           ├── tests.xcscheme
    │   │           ├── unibinary.xcscheme
    │   │           └── xcschememanagement.plist
    └── unibinary
    │   ├── Makefile
    │   ├── main.c
    │   ├── tests.c
    │   ├── unibinary.1
    │   ├── unibinary.c
    │   └── unibinary.h
├── javascript
    ├── test
    │   ├── template.css
    │   ├── test.html
    │   ├── test.js
    │   ├── typedarray.js
    │   ├── wru.console.js
    │   └── wru.min.js
    ├── unibinary.js
    └── unibinary_tool.js
└── python
    ├── ub_profile.py
    ├── ub_test.py
    └── unibinary.py


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Nicolas Seriot
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # UniBinary
  2 | 
  3 | _Encodes data into printable Unicode characters._
  4 | 
  5 | ### What is UniBinary
  6 | 
  7 | UniBinary is an encoding algorithm which packs arbitrary data into printable Unicode characters.
  8 | 
  9 | It can be used to send data through media such as Twitter which don't allow binary data but allow Unicode characters.
 10 | 
 11 | UniBinary is akin to Base64 but uses much fewer characters.
 12 | 
 13 | UniBinary comes with three parts:
 14 | 
 15 | - this documentation,
 16 | - a Python implementation,
 17 | - a C implementation.
 18 | 
 19 | ### Python Implementation
 20 | 
 21 | Encode a binary file into a UTF-8 text file:
 22 | 
 23 |     $ python unibinary.py -e /bin/date > /tmp/date.txt
 24 |     $ file /tmp/date.txt 
 25 |     /tmp/date.txt: UTF-8 Unicode text, with very long lines, with no line terminators
 26 | 
 27 | Decode a UTF-8 text file into a binary file:
 28 |     
 29 |     $ python unibinary.py -d /tmp/date.txt > /tmp/date
 30 |     $ file /tmp/date
 31 |     /tmp/date: Mach-O 64-bit executable x86_64
 32 | 
 33 | It works!
 34 |     
 35 |     $ chmod +x /tmp/date
 36 |     $ /tmp/date
 37 |     Thu Jan 17 18:02:24 CET 2013
 38 | 
 39 | Inline string encoding:
 40 | 
 41 |     $ python unibinary.py -es "test"
 42 |     鬥髴
 43 | 
 44 | Inline string decoding:
 45 | 
 46 |     $ python unibinary.py -ds "嫯壭巠唀帀廀帀庀帀庀帀嚀一币帀币帀常帀済靬餯瘷駲餤悀巿巿Ѐ丅戀Ѐ丅榀帀乿巿巰叿巿崀帀丏巿巿崅帀渐帀币帀帐帀丏崀巿嵪焨最帀袁夀劃峀勍嘈凄爪与夑巰一帀ӿ丅丏巿蠀帀夀侃峀勍嘏巿巿巿帀巿崀丏巿" > micro_macho
 47 |     $ chmod +x micro_macho
 48 |     $ ./micro_macho
 49 |     Hello world
 50 | 
 51 | ### C Implementation
 52 | 
 53 | Compile it with `make`:
 54 | 
 55 | 	$ make
 56 | 	$ make tests
 57 | 
 58 | Run the unit tests:
 59 | 
 60 | 	$ ./tests
 61 | 	...
 62 | 	-- ALL TESTS ARE OK --
 63 | 
 64 | Run the main executable:
 65 | 
 66 | 	$ ./unibinary
 67 | 	Usage: unibinary [-ed] [-sf] [-b num] [-h]
 68 | 
 69 | 	UniBinary encodes and decodes data into printable Unicode characters.
 70 | 
 71 | 	  -e, --encode
 72 | 	  -d, --decode
 73 | 	  -s, --string    to be encoded or decoded
 74 | 	  -f, --filepath  to be encoded or decoded
 75 | 	  -b, --break     break encoded string into num characters lines
 76 | 	  -h, --help      show this help message and exit
 77 | 
 78 | Encode a file, break output in lines of 16 characters:
 79 | 
 80 | 	$ unibinary -b 16 -ef micro_macho 
 81 | 	嫯壭巠唀帀廀帀庀帀庀帀嚀一币Ѐ七
 82 | 	幀帀氀帀逥餬觠鯯骬蜊丏巿巰一Ѐ七
 83 | 	戀Ѐ丅榀帀乿巿巰叿巿崀Ѐ七ӿ丄彀
 84 | 	帀舀帀幀帀戀帀巰仿巶堌蠤Ѐ七袁夀
 85 | 	劃峀勍嘈凄爪与夑巰一帀ӿ丅丏巿蠀
 86 | 	帀夀侃峀勍嘏巿ӿ七帀巿崀丏巿
 87 | 
 88 | Encode stdin and decode the output:
 89 | 
 90 | 	$ echo "test" | unibinary -e | unibinary -d
 91 | 	test
 92 | 
 93 | API (`unibinary.h`)
 94 | 
 95 | 	// encode
 96 | 	int unibinary_encode(FILE *fd_in, FILE *fd_out, size_t wrap_length);
 97 | 	int unibinary_encode_string(const char* src, wchar_t **dst, size_t wrap_length);
 98 | 
 99 | 	// decode
100 | 	int unibinary_decode(FILE *src, FILE *dst);
101 | 	int unibinary_decode_string(const wchar_t *src, char **dst, long *dst_len);
102 | 
103 | Encoding and decoding are efficient and time (worst case) is linear with input size.
104 | 	
105 | In the following example, 10 times the data take 10 times more time to encode or decode.
106 | 
107 | 	# generate 50 MB of random data
108 |  	$ dd if=/dev/urandom bs=1k count=1024*50 > /tmp/50
109 | 	$ shasum /tmp/50
110 | 	ca8554834cb036a6f7caf449f771573f82ef8b26  /tmp/50
111 | 	$ time unibinary -ef /tmp/50 > /tmp/50.txt
112 | 	user	0m8.062s
113 | 	$ time unibinary -df /tmp/50.txt > /tmp/50_decoded
114 | 	user	0m6.712s
115 | 	$ shasum /tmp/50_decoded 
116 | 	ca8554834cb036a6f7caf449f771573f82ef8b26  /tmp/50_decoded
117 | 	
118 | 	# generate 500 MB of random data
119 | 	$ dd if=/dev/urandom bs=1k count=1024*500 > /tmp/500
120 | 	$ shasum /tmp/500
121 | 	a69bacfbe3999a817cab9608d14f463fce9b2cd7  /tmp/500
122 | 	$ user	1m20.879s
123 | 	$ time unibinary -df /tmp/500.txt > /tmp/500_decoded
124 | 	user	1m7.764s
125 | 	$ shasum /tmp/500_decoded
126 | 	a69bacfbe3999a817cab9608d14f463fce9b2cd7  /tmp/500_decoded
127 | 
128 | ### Encoded Text Size
129 | 
130 | UniBinary can store 3 arbitrary bytes or 4 ASCII 7-bits characters into 2 Unicode characters.
131 | 
132 | You can compare UniBinary with Base64, which stores 3 bytes into 4 ASCII characters:
133 | 
134 |             | UniBinary (Unicode) | Base64 (ASCII)
135 |     --------+---------------------+----------------
136 |      6 bits |                     | 1 character
137 |     12 bits | 1 character         | 
138 |     2 ASCII | 1 character         | 
139 |     3 bytes | 2 characters        | 4 characters
140 |     6 ASCII | 3 characters        | 8 characters
141 | 
142 | The worst case of encoding `N` bytes is `(N * 2 / 3 + 2)` Unicode characters.
143 | 
144 | `C` Unicode characters can store at least `(C - C % 3) * 3 / 2 + (C % 3)` bytes.
145 | 
146 | Hence, UniBinary can pack at least 209 bytes in 140 characters.
147 | 
148 | In case of a text only made out of `N` ASCII 7-bits characters, the worst case is `N / 2 + 1` Unicode characters.
149 | 
150 | Also, any repeated sequence of character will be compressed with a [run-length encoding](http://en.wikipedia.org/wiki/Run-length_encoding).
151 | 
152 | ### Format Description
153 | 
154 | #### 1. Storing Data into Unicode Code Points
155 | 
156 | UniBinary packs data into three ranges of Unicode characters, named `U8`, `U12a` and `U12b`.
157 | 
158 | A character in `U8` stores a 8-bits value, a character in `U12a` or `U12b` stores a 12-bits value.
159 | 
160 |     U8   = [ \u0400, ..., \u0400 + 0x100 [
161 | 
162 |     U12b = [ \u4E00, ..., \u4E00 + 0x1000 [
163 | 
164 |     U12a_0_0 = [ \u5E00, ..., \u5E00 + 0x1000 [
165 |     U12a_0_1 = [ \u6E00, ..., \u6E00 + 0x1000 [
166 |     U12a_1_0 = [ \u7E00, ..., \u7E00 + 0x1000 [
167 |     U12a_1_1 = [ \u8E00, ..., \u8E00 + 0x1000 [
168 | 
169 | `U8` is actually the "Cyrillic" block, while `U12a` and `U12b` are subsets of the "CJK Unified Ideographs" block.
170 | 
171 | `U8` and `U12b` store arbitrary 8 and 12 bits sequences, while the `U12a` blocks are used to store ASCII 7-bits characters.
172 | 
173 | The offset in the range represent the bits to be encoded.
174 | 
175 |     0xAB  (8 bits)  gets encoded as \u0400 + 0xAB  = \u04AB = ҫ
176 |     0xABC (12 bits) gets encoded as \u4E00 + 0xABC = \u58BC = 뱘
177 | 
178 | #### 2. Mapping Arbitrary Bytes into Unicode
179 | 
180 | UniBinary reads three bytes to yield two Unicode characters in the `U12b` range .
181 | 
182 | Here is how UniBinary encode the 24 bits value `0xABCDEF` into two Unicode characters, and how Base64 does it by comparision:
183 | 
184 |             UniBinary                |                 Base64
185 |                                      |
186 |     A   B   |C   D   |E   F          |        A   B   |C   D   |E   F   
187 |     10101011 11001101 11101111       |        10101011 11001101 11101111
188 |     [-----------][-----------]       |        [----][-----][-----][----]
189 |          ABC          DEF            |        101010 111100 110111 101111
190 |     \u4E00+0xABC \u4E00+0xDEF        |          42     60     55     47
191 |           墼           寯             |          q      8      3      v
192 | 
193 | If less than three bytes are available, UniBinary reads bytes one by one to yield Unicode characters in `U8`.
194 | 
195 |     A   B   
196 |     10101011
197 |     [------]
198 |        AB
199 |     \u0400+0xAB
200 |        ҫ
201 | 
202 | #### 3. Mapping ASCII 7-bits into one Unicode character
203 | 
204 | When UniBinary meets 2 ASCII 7-bits characters `a1` and `a2`, it encodes them into one single Unicode character. This character is chosen out of four possible ranges, depending on the value of the ASCII characters:
205 | 
206 |     U12a_0_0    [ \u5E00, ..., \u5E00 + 0x1000 [     for a1 <  64 and a2 <  64
207 |     U12a_0_1    [ \u6E00, ..., \u6E00 + 0x1000 [     for a1 <  64 and a2 >= 64
208 |     U12a_1_0    [ \u7E00, ..., \u7E00 + 0x1000 [     for a1 >= 64 and a2 <  64
209 |     U12a_1_1    [ \u8E00, ..., \u8E00 + 0x1000 [     for a1 >= 64 and a2 >= 64
210 | 
211 | So, we can pack 2 * 6 bits in a `U12a` Unicode character. We use four different ranges to replace the 7th (MSB) missing bit. We use `U12a_1_0` and `U12a_1_1` to add 64 to `a1`, and `U12a_0_1` and `U12a_1_1` to add 64 to `a2`. As a result, we can store any tuple of 2 ASCII 7-bits characters in a single Unicode character.
212 | 
213 | #### 4. Run Length Encoding 
214 | 
215 | UniBinary also takes advantage of repetitions to spare bytes. A byte `B` repeated more that 3 times gets encoded as `(u8, u12)` where `u8` stores `B` and `u12` stores the number of times that `B` is repeated in the `U12b` range.
216 | 
217 | #### 5. Format Summary
218 |     
219 |     - u8   u12b ->  byte B (u8) repeated N times (u12) | N in [3, 0xFFF]
220 |     - u12a      ->  12 bits (2 ASCII characters)
221 |     - u12b u12b ->  24 bits (3 bytes)
222 |     - u8        ->  8 bits (1 byte)
223 | 
224 | UniBinary encoded data can be described with the following regular expression:
225 | 
226 |     ( u12a | (u12 u12) | (u8 u12) )* u8 {0,2}
227 |     
228 | Note that new lines (`\n`) can appear anywhere in the encoded text. The decoding algorithm does simply ignore them.
229 | 
230 | #### 6. Examples
231 | 
232 |     0x12 0x34           -> encode 0x12 into U8, encode 0x34 into U8
233 |     0xAB 0xCD 0xEF      -> encode 0xABC into U12b, encode 0xDEF into U12b
234 |     0xFF 0xFF 0xFF 0xFF -> encode 0xFF into U8, encode 0x4 into U12b
235 | 
236 |     AB CD EF FF FF FF FF 00 -> U12(0xABC), U12(0xDEF), U8(4), U12(0xFF), U8(00) -> "墼寯巿巿Ѐ"
237 | 
238 |     13808 bytes /usr/bin/true -> 3253 Unicode characters, 9721 bytes UTF-8 file
239 | 
240 | ### Encoding Algorithm
241 | 
242 | First look for repetitions (no more than `0xFFF` at a time). If no repeat, then try to consume two ASCII chars. If it's not possible, look for three bytes. If less than three bytes are available, encode one byte at a time.
243 | 
244 |     1. byte B repeated N times | N >= 3   ->    U8(B), U12(N)
245 |     2. ASCII characters A1, A2            ->    U12a(A1, A2)
246 |     3. bytes B1, B2, B3                   ->    U12b(B1 << 4 + B2 >> 4), U12b(((B2 & 0xF) << 8) + B3)
247 |     4. byte B                             ->    U8(B)
248 | 
249 | ### Decoding Algorithm
250 | 
251 | For each unicode character, use the range to know how to unmarshall data. Extract two ASCII characters out of `U12a`, or `N` times `B` out of `(U8, U12b)`, or three bytes out of `(U12b, U12b)`, or one bytes out of `U8`.
252 | 
253 | See to the source code for implementation details.
254 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 54;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		03495DA218707FF200D81680 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 03495DA118707FF200D81680 /* main.c */; };
 11 | 		03495DA51870810D00D81680 /* unibinary.c in Sources */ = {isa = PBXBuildFile; fileRef = 03A3B0E61869ED890070BD43 /* unibinary.c */; };
 12 | 		03495DA61870810D00D81680 /* tests.c in Sources */ = {isa = PBXBuildFile; fileRef = 03E6BF9718707F9B001C339E /* tests.c */; };
 13 | 		03495DAA1870810D00D81680 /* unibinary.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = 03A3B0E81869ED890070BD43 /* unibinary.1 */; };
 14 | 		03A3B0E71869ED890070BD43 /* unibinary.c in Sources */ = {isa = PBXBuildFile; fileRef = 03A3B0E61869ED890070BD43 /* unibinary.c */; };
 15 | 		03A3B0E91869ED890070BD43 /* unibinary.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = 03A3B0E81869ED890070BD43 /* unibinary.1 */; };
 16 | /* End PBXBuildFile section */
 17 | 
 18 | /* Begin PBXContainerItemProxy section */
 19 | 		03E7F1C418A0E5CA0075AF4A /* PBXContainerItemProxy */ = {
 20 | 			isa = PBXContainerItemProxy;
 21 | 			containerPortal = 03A3B0DB1869ED890070BD43 /* Project object */;
 22 | 			proxyType = 1;
 23 | 			remoteGlobalIDString = 03A3B0E21869ED890070BD43;
 24 | 			remoteInfo = unibinary;
 25 | 		};
 26 | /* End PBXContainerItemProxy section */
 27 | 
 28 | /* Begin PBXCopyFilesBuildPhase section */
 29 | 		03495DA91870810D00D81680 /* CopyFiles */ = {
 30 | 			isa = PBXCopyFilesBuildPhase;
 31 | 			buildActionMask = 2147483647;
 32 | 			dstPath = /usr/share/man/man1/;
 33 | 			dstSubfolderSpec = 0;
 34 | 			files = (
 35 | 				03495DAA1870810D00D81680 /* unibinary.1 in CopyFiles */,
 36 | 			);
 37 | 			runOnlyForDeploymentPostprocessing = 1;
 38 | 		};
 39 | 		03A3B0E11869ED890070BD43 /* CopyFiles */ = {
 40 | 			isa = PBXCopyFilesBuildPhase;
 41 | 			buildActionMask = 2147483647;
 42 | 			dstPath = /usr/share/man/man1/;
 43 | 			dstSubfolderSpec = 0;
 44 | 			files = (
 45 | 				03A3B0E91869ED890070BD43 /* unibinary.1 in CopyFiles */,
 46 | 			);
 47 | 			runOnlyForDeploymentPostprocessing = 1;
 48 | 		};
 49 | /* End PBXCopyFilesBuildPhase section */
 50 | 
 51 | /* Begin PBXFileReference section */
 52 | 		03495DA018707FE100D81680 /* unibinary.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = unibinary.h; sourceTree = "<group>"; };
 53 | 		03495DA118707FF200D81680 /* main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
 54 | 		03495DAE1870810D00D81680 /* tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = tests; sourceTree = BUILT_PRODUCTS_DIR; };
 55 | 		03A3B0E31869ED890070BD43 /* unibinary */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = unibinary; sourceTree = BUILT_PRODUCTS_DIR; };
 56 | 		03A3B0E61869ED890070BD43 /* unibinary.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = unibinary.c; sourceTree = "<group>"; };
 57 | 		03A3B0E81869ED890070BD43 /* unibinary.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = unibinary.1; sourceTree = "<group>"; };
 58 | 		03E6BF9718707F9B001C339E /* tests.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tests.c; sourceTree = "<group>"; };
 59 | 		03E7F1C318A0E4D80075AF4A /* Makefile */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.make; name = Makefile; path = unibinary/Makefile; sourceTree = "<group>"; };
 60 | /* End PBXFileReference section */
 61 | 
 62 | /* Begin PBXFrameworksBuildPhase section */
 63 | 		03495DA81870810D00D81680 /* Frameworks */ = {
 64 | 			isa = PBXFrameworksBuildPhase;
 65 | 			buildActionMask = 2147483647;
 66 | 			files = (
 67 | 			);
 68 | 			runOnlyForDeploymentPostprocessing = 0;
 69 | 		};
 70 | 		03A3B0E01869ED890070BD43 /* Frameworks */ = {
 71 | 			isa = PBXFrameworksBuildPhase;
 72 | 			buildActionMask = 2147483647;
 73 | 			files = (
 74 | 			);
 75 | 			runOnlyForDeploymentPostprocessing = 0;
 76 | 		};
 77 | /* End PBXFrameworksBuildPhase section */
 78 | 
 79 | /* Begin PBXGroup section */
 80 | 		03495DB2187086B200D81680 /* tests */ = {
 81 | 			isa = PBXGroup;
 82 | 			children = (
 83 | 				03E6BF9718707F9B001C339E /* tests.c */,
 84 | 			);
 85 | 			name = tests;
 86 | 			path = unibinary;
 87 | 			sourceTree = "<group>";
 88 | 		};
 89 | 		03A3B0DA1869ED890070BD43 = {
 90 | 			isa = PBXGroup;
 91 | 			children = (
 92 | 				03E7F1C318A0E4D80075AF4A /* Makefile */,
 93 | 				03A3B0E51869ED890070BD43 /* unibinary */,
 94 | 				03495DB2187086B200D81680 /* tests */,
 95 | 				03A3B0E41869ED890070BD43 /* Products */,
 96 | 			);
 97 | 			sourceTree = "<group>";
 98 | 		};
 99 | 		03A3B0E41869ED890070BD43 /* Products */ = {
100 | 			isa = PBXGroup;
101 | 			children = (
102 | 				03A3B0E31869ED890070BD43 /* unibinary */,
103 | 				03495DAE1870810D00D81680 /* tests */,
104 | 			);
105 | 			name = Products;
106 | 			sourceTree = "<group>";
107 | 		};
108 | 		03A3B0E51869ED890070BD43 /* unibinary */ = {
109 | 			isa = PBXGroup;
110 | 			children = (
111 | 				03495DA018707FE100D81680 /* unibinary.h */,
112 | 				03A3B0E61869ED890070BD43 /* unibinary.c */,
113 | 				03495DA118707FF200D81680 /* main.c */,
114 | 				03A3B0E81869ED890070BD43 /* unibinary.1 */,
115 | 			);
116 | 			path = unibinary;
117 | 			sourceTree = "<group>";
118 | 		};
119 | /* End PBXGroup section */
120 | 
121 | /* Begin PBXNativeTarget section */
122 | 		03495DA31870810D00D81680 /* tests */ = {
123 | 			isa = PBXNativeTarget;
124 | 			buildConfigurationList = 03495DAB1870810D00D81680 /* Build configuration list for PBXNativeTarget "tests" */;
125 | 			buildPhases = (
126 | 				03495DA41870810D00D81680 /* Sources */,
127 | 				03495DA81870810D00D81680 /* Frameworks */,
128 | 				03495DA91870810D00D81680 /* CopyFiles */,
129 | 			);
130 | 			buildRules = (
131 | 			);
132 | 			dependencies = (
133 | 				03E7F1C518A0E5CA0075AF4A /* PBXTargetDependency */,
134 | 			);
135 | 			name = tests;
136 | 			productName = unibinary;
137 | 			productReference = 03495DAE1870810D00D81680 /* tests */;
138 | 			productType = "com.apple.product-type.tool";
139 | 		};
140 | 		03A3B0E21869ED890070BD43 /* unibinary */ = {
141 | 			isa = PBXNativeTarget;
142 | 			buildConfigurationList = 03A3B0EC1869ED890070BD43 /* Build configuration list for PBXNativeTarget "unibinary" */;
143 | 			buildPhases = (
144 | 				03A3B0DF1869ED890070BD43 /* Sources */,
145 | 				03A3B0E01869ED890070BD43 /* Frameworks */,
146 | 				03A3B0E11869ED890070BD43 /* CopyFiles */,
147 | 			);
148 | 			buildRules = (
149 | 			);
150 | 			dependencies = (
151 | 			);
152 | 			name = unibinary;
153 | 			productName = unibinary;
154 | 			productReference = 03A3B0E31869ED890070BD43 /* unibinary */;
155 | 			productType = "com.apple.product-type.tool";
156 | 		};
157 | /* End PBXNativeTarget section */
158 | 
159 | /* Begin PBXProject section */
160 | 		03A3B0DB1869ED890070BD43 /* Project object */ = {
161 | 			isa = PBXProject;
162 | 			attributes = {
163 | 				BuildIndependentTargetsInParallel = YES;
164 | 				LastUpgradeCheck = 1610;
165 | 				ORGANIZATIONNAME = "Nicolas Seriot";
166 | 			};
167 | 			buildConfigurationList = 03A3B0DE1869ED890070BD43 /* Build configuration list for PBXProject "unibinary" */;
168 | 			compatibilityVersion = "Xcode 3.2";
169 | 			developmentRegion = en;
170 | 			hasScannedForEncodings = 0;
171 | 			knownRegions = (
172 | 				en,
173 | 				Base,
174 | 			);
175 | 			mainGroup = 03A3B0DA1869ED890070BD43;
176 | 			productRefGroup = 03A3B0E41869ED890070BD43 /* Products */;
177 | 			projectDirPath = "";
178 | 			projectRoot = "";
179 | 			targets = (
180 | 				03A3B0E21869ED890070BD43 /* unibinary */,
181 | 				03495DA31870810D00D81680 /* tests */,
182 | 			);
183 | 		};
184 | /* End PBXProject section */
185 | 
186 | /* Begin PBXSourcesBuildPhase section */
187 | 		03495DA41870810D00D81680 /* Sources */ = {
188 | 			isa = PBXSourcesBuildPhase;
189 | 			buildActionMask = 2147483647;
190 | 			files = (
191 | 				03495DA51870810D00D81680 /* unibinary.c in Sources */,
192 | 				03495DA61870810D00D81680 /* tests.c in Sources */,
193 | 			);
194 | 			runOnlyForDeploymentPostprocessing = 0;
195 | 		};
196 | 		03A3B0DF1869ED890070BD43 /* Sources */ = {
197 | 			isa = PBXSourcesBuildPhase;
198 | 			buildActionMask = 2147483647;
199 | 			files = (
200 | 				03A3B0E71869ED890070BD43 /* unibinary.c in Sources */,
201 | 				03495DA218707FF200D81680 /* main.c in Sources */,
202 | 			);
203 | 			runOnlyForDeploymentPostprocessing = 0;
204 | 		};
205 | /* End PBXSourcesBuildPhase section */
206 | 
207 | /* Begin PBXTargetDependency section */
208 | 		03E7F1C518A0E5CA0075AF4A /* PBXTargetDependency */ = {
209 | 			isa = PBXTargetDependency;
210 | 			target = 03A3B0E21869ED890070BD43 /* unibinary */;
211 | 			targetProxy = 03E7F1C418A0E5CA0075AF4A /* PBXContainerItemProxy */;
212 | 		};
213 | /* End PBXTargetDependency section */
214 | 
215 | /* Begin XCBuildConfiguration section */
216 | 		03495DAC1870810D00D81680 /* Debug */ = {
217 | 			isa = XCBuildConfiguration;
218 | 			buildSettings = {
219 | 				DEAD_CODE_STRIPPING = YES;
220 | 				MACOSX_DEPLOYMENT_TARGET = 11.5;
221 | 				PRODUCT_NAME = tests;
222 | 			};
223 | 			name = Debug;
224 | 		};
225 | 		03495DAD1870810D00D81680 /* Release */ = {
226 | 			isa = XCBuildConfiguration;
227 | 			buildSettings = {
228 | 				DEAD_CODE_STRIPPING = YES;
229 | 				MACOSX_DEPLOYMENT_TARGET = 11.5;
230 | 				PRODUCT_NAME = tests;
231 | 			};
232 | 			name = Release;
233 | 		};
234 | 		03A3B0EA1869ED890070BD43 /* Debug */ = {
235 | 			isa = XCBuildConfiguration;
236 | 			buildSettings = {
237 | 				ALWAYS_SEARCH_USER_PATHS = NO;
238 | 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
239 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
240 | 				CLANG_CXX_LIBRARY = "libc++";
241 | 				CLANG_ENABLE_OBJC_ARC = YES;
242 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
243 | 				CLANG_WARN_BOOL_CONVERSION = YES;
244 | 				CLANG_WARN_COMMA = YES;
245 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
246 | 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
247 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
248 | 				CLANG_WARN_EMPTY_BODY = YES;
249 | 				CLANG_WARN_ENUM_CONVERSION = YES;
250 | 				CLANG_WARN_INFINITE_RECURSION = YES;
251 | 				CLANG_WARN_INT_CONVERSION = YES;
252 | 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
253 | 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
254 | 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
255 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
256 | 				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
257 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
258 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
259 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
260 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
261 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
262 | 				COPY_PHASE_STRIP = NO;
263 | 				DEAD_CODE_STRIPPING = YES;
264 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
265 | 				ENABLE_TESTABILITY = YES;
266 | 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
267 | 				GCC_C_LANGUAGE_STANDARD = gnu99;
268 | 				GCC_DYNAMIC_NO_PIC = NO;
269 | 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
270 | 				GCC_NO_COMMON_BLOCKS = YES;
271 | 				GCC_OPTIMIZATION_LEVEL = 0;
272 | 				GCC_PREPROCESSOR_DEFINITIONS = (
273 | 					"DEBUG=1",
274 | 					"$(inherited)",
275 | 				);
276 | 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
277 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
278 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
279 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
280 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
281 | 				GCC_WARN_UNUSED_FUNCTION = YES;
282 | 				GCC_WARN_UNUSED_VARIABLE = YES;
283 | 				ONLY_ACTIVE_ARCH = YES;
284 | 				OTHER_CFLAGS = "-Wall";
285 | 				SDKROOT = macosx;
286 | 			};
287 | 			name = Debug;
288 | 		};
289 | 		03A3B0EB1869ED890070BD43 /* Release */ = {
290 | 			isa = XCBuildConfiguration;
291 | 			buildSettings = {
292 | 				ALWAYS_SEARCH_USER_PATHS = NO;
293 | 				CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES;
294 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
295 | 				CLANG_CXX_LIBRARY = "libc++";
296 | 				CLANG_ENABLE_OBJC_ARC = YES;
297 | 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
298 | 				CLANG_WARN_BOOL_CONVERSION = YES;
299 | 				CLANG_WARN_COMMA = YES;
300 | 				CLANG_WARN_CONSTANT_CONVERSION = YES;
301 | 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
302 | 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
303 | 				CLANG_WARN_EMPTY_BODY = YES;
304 | 				CLANG_WARN_ENUM_CONVERSION = YES;
305 | 				CLANG_WARN_INFINITE_RECURSION = YES;
306 | 				CLANG_WARN_INT_CONVERSION = YES;
307 | 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
308 | 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
309 | 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
310 | 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
311 | 				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
312 | 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
313 | 				CLANG_WARN_STRICT_PROTOTYPES = YES;
314 | 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
315 | 				CLANG_WARN_UNREACHABLE_CODE = YES;
316 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
317 | 				COPY_PHASE_STRIP = YES;
318 | 				DEAD_CODE_STRIPPING = YES;
319 | 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
320 | 				ENABLE_NS_ASSERTIONS = NO;
321 | 				ENABLE_STRICT_OBJC_MSGSEND = YES;
322 | 				ENABLE_USER_SCRIPT_SANDBOXING = YES;
323 | 				GCC_C_LANGUAGE_STANDARD = gnu99;
324 | 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
325 | 				GCC_NO_COMMON_BLOCKS = YES;
326 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
327 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
328 | 				GCC_WARN_UNDECLARED_SELECTOR = YES;
329 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
330 | 				GCC_WARN_UNUSED_FUNCTION = YES;
331 | 				GCC_WARN_UNUSED_VARIABLE = YES;
332 | 				OTHER_CFLAGS = "-Wall";
333 | 				SDKROOT = macosx;
334 | 			};
335 | 			name = Release;
336 | 		};
337 | 		03A3B0ED1869ED890070BD43 /* Debug */ = {
338 | 			isa = XCBuildConfiguration;
339 | 			buildSettings = {
340 | 				DEAD_CODE_STRIPPING = YES;
341 | 				MACOSX_DEPLOYMENT_TARGET = 11.0;
342 | 				PRODUCT_NAME = "$(TARGET_NAME)";
343 | 			};
344 | 			name = Debug;
345 | 		};
346 | 		03A3B0EE1869ED890070BD43 /* Release */ = {
347 | 			isa = XCBuildConfiguration;
348 | 			buildSettings = {
349 | 				DEAD_CODE_STRIPPING = YES;
350 | 				MACOSX_DEPLOYMENT_TARGET = 11.0;
351 | 				PRODUCT_NAME = "$(TARGET_NAME)";
352 | 			};
353 | 			name = Release;
354 | 		};
355 | /* End XCBuildConfiguration section */
356 | 
357 | /* Begin XCConfigurationList section */
358 | 		03495DAB1870810D00D81680 /* Build configuration list for PBXNativeTarget "tests" */ = {
359 | 			isa = XCConfigurationList;
360 | 			buildConfigurations = (
361 | 				03495DAC1870810D00D81680 /* Debug */,
362 | 				03495DAD1870810D00D81680 /* Release */,
363 | 			);
364 | 			defaultConfigurationIsVisible = 0;
365 | 			defaultConfigurationName = Release;
366 | 		};
367 | 		03A3B0DE1869ED890070BD43 /* Build configuration list for PBXProject "unibinary" */ = {
368 | 			isa = XCConfigurationList;
369 | 			buildConfigurations = (
370 | 				03A3B0EA1869ED890070BD43 /* Debug */,
371 | 				03A3B0EB1869ED890070BD43 /* Release */,
372 | 			);
373 | 			defaultConfigurationIsVisible = 0;
374 | 			defaultConfigurationName = Release;
375 | 		};
376 | 		03A3B0EC1869ED890070BD43 /* Build configuration list for PBXNativeTarget "unibinary" */ = {
377 | 			isa = XCConfigurationList;
378 | 			buildConfigurations = (
379 | 				03A3B0ED1869ED890070BD43 /* Debug */,
380 | 				03A3B0EE1869ED890070BD43 /* Release */,
381 | 			);
382 | 			defaultConfigurationIsVisible = 0;
383 | 			defaultConfigurationName = Release;
384 | 		};
385 | /* End XCConfigurationList section */
386 | 	};
387 | 	rootObject = 03A3B0DB1869ED890070BD43 /* Project object */;
388 | }
389 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "self:unibinary.xcodeproj">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/project.xcworkspace/xcshareddata/unibinary.xccheckout:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>IDESourceControlProjectFavoriteDictionaryKey</key>
 6 | 	<false/>
 7 | 	<key>IDESourceControlProjectIdentifier</key>
 8 | 	<string>3F27F61D-A4BB-473B-AD19-AAB4617EA8C6</string>
 9 | 	<key>IDESourceControlProjectName</key>
10 | 	<string>unibinary</string>
11 | 	<key>IDESourceControlProjectOriginsDictionary</key>
12 | 	<dict>
13 | 		<key>88D749B0-2DE6-49D3-B7FD-DE2AD7DF2BAC</key>
14 | 		<string>https://github.com/nst/UniBinary.git</string>
15 | 	</dict>
16 | 	<key>IDESourceControlProjectPath</key>
17 | 	<string>c/unibinary.xcodeproj/project.xcworkspace</string>
18 | 	<key>IDESourceControlProjectRelativeInstallPathDictionary</key>
19 | 	<dict>
20 | 		<key>88D749B0-2DE6-49D3-B7FD-DE2AD7DF2BAC</key>
21 | 		<string>../../..</string>
22 | 	</dict>
23 | 	<key>IDESourceControlProjectURL</key>
24 | 	<string>https://github.com/nst/UniBinary.git</string>
25 | 	<key>IDESourceControlProjectVersion</key>
26 | 	<integer>110</integer>
27 | 	<key>IDESourceControlProjectWCCIdentifier</key>
28 | 	<string>88D749B0-2DE6-49D3-B7FD-DE2AD7DF2BAC</string>
29 | 	<key>IDESourceControlProjectWCConfigurations</key>
30 | 	<array>
31 | 		<dict>
32 | 			<key>IDESourceControlRepositoryExtensionIdentifierKey</key>
33 | 			<string>public.vcs.git</string>
34 | 			<key>IDESourceControlWCCIdentifierKey</key>
35 | 			<string>88D749B0-2DE6-49D3-B7FD-DE2AD7DF2BAC</string>
36 | 			<key>IDESourceControlWCCName</key>
37 | 			<string>UniBinary</string>
38 | 		</dict>
39 | 	</array>
40 | </dict>
41 | </plist>
42 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/project.xcworkspace/xcuserdata/nst.xcuserdatad/UserInterfaceState.xcuserstate:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nst/UniBinary/f6a9929f90616540060d329ef7114c1b6edb3a76/c/unibinary.xcodeproj/project.xcworkspace/xcuserdata/nst.xcuserdatad/UserInterfaceState.xcuserstate


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/project.xcworkspace/xcuserdata/nst.xcuserdatad/WorkspaceSettings.xcsettings:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>HasAskedToTakeAutomaticSnapshotBeforeSignificantChanges</key>
 6 | 	<true/>
 7 | 	<key>SnapshotAutomaticallyBeforeSignificantChanges</key>
 8 | 	<false/>
 9 | </dict>
10 | </plist>
11 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/xcuserdata/nst.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Bucket
3 |    type = "1"
4 |    version = "2.0">
5 | </Bucket>
6 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/xcuserdata/nst.xcuserdatad/xcschemes/tests.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "0500"
 4 |    version = "1.3">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES">
 8 |       <BuildActionEntries>
 9 |          <BuildActionEntry
10 |             buildForTesting = "YES"
11 |             buildForRunning = "YES"
12 |             buildForProfiling = "YES"
13 |             buildForArchiving = "YES"
14 |             buildForAnalyzing = "YES">
15 |             <BuildableReference
16 |                BuildableIdentifier = "primary"
17 |                BlueprintIdentifier = "03495DA31870810D00D81680"
18 |                BuildableName = "tests"
19 |                BlueprintName = "tests"
20 |                ReferencedContainer = "container:unibinary.xcodeproj">
21 |             </BuildableReference>
22 |          </BuildActionEntry>
23 |       </BuildActionEntries>
24 |    </BuildAction>
25 |    <TestAction
26 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
27 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
28 |       shouldUseLaunchSchemeArgsEnv = "YES"
29 |       buildConfiguration = "Debug">
30 |       <Testables>
31 |       </Testables>
32 |       <MacroExpansion>
33 |          <BuildableReference
34 |             BuildableIdentifier = "primary"
35 |             BlueprintIdentifier = "03495DA31870810D00D81680"
36 |             BuildableName = "tests"
37 |             BlueprintName = "tests"
38 |             ReferencedContainer = "container:unibinary.xcodeproj">
39 |          </BuildableReference>
40 |       </MacroExpansion>
41 |    </TestAction>
42 |    <LaunchAction
43 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
44 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
45 |       launchStyle = "0"
46 |       useCustomWorkingDirectory = "NO"
47 |       buildConfiguration = "Debug"
48 |       ignoresPersistentStateOnLaunch = "NO"
49 |       debugDocumentVersioning = "YES"
50 |       allowLocationSimulation = "YES">
51 |       <BuildableProductRunnable>
52 |          <BuildableReference
53 |             BuildableIdentifier = "primary"
54 |             BlueprintIdentifier = "03495DA31870810D00D81680"
55 |             BuildableName = "tests"
56 |             BlueprintName = "tests"
57 |             ReferencedContainer = "container:unibinary.xcodeproj">
58 |          </BuildableReference>
59 |       </BuildableProductRunnable>
60 |       <EnvironmentVariables>
61 |          <EnvironmentVariable
62 |             key = "LC_ALL"
63 |             value = "en_US.utf8"
64 |             isEnabled = "YES">
65 |          </EnvironmentVariable>
66 |       </EnvironmentVariables>
67 |       <AdditionalOptions>
68 |       </AdditionalOptions>
69 |    </LaunchAction>
70 |    <ProfileAction
71 |       shouldUseLaunchSchemeArgsEnv = "YES"
72 |       savedToolIdentifier = ""
73 |       useCustomWorkingDirectory = "NO"
74 |       buildConfiguration = "Release"
75 |       debugDocumentVersioning = "YES">
76 |       <BuildableProductRunnable>
77 |          <BuildableReference
78 |             BuildableIdentifier = "primary"
79 |             BlueprintIdentifier = "03495DA31870810D00D81680"
80 |             BuildableName = "tests"
81 |             BlueprintName = "tests"
82 |             ReferencedContainer = "container:unibinary.xcodeproj">
83 |          </BuildableReference>
84 |       </BuildableProductRunnable>
85 |    </ProfileAction>
86 |    <AnalyzeAction
87 |       buildConfiguration = "Debug">
88 |    </AnalyzeAction>
89 |    <ArchiveAction
90 |       buildConfiguration = "Release"
91 |       revealArchiveInOrganizer = "YES">
92 |    </ArchiveAction>
93 | </Scheme>
94 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/xcuserdata/nst.xcuserdatad/xcschemes/unibinary.xcscheme:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Scheme
 3 |    LastUpgradeVersion = "0500"
 4 |    version = "1.3">
 5 |    <BuildAction
 6 |       parallelizeBuildables = "YES"
 7 |       buildImplicitDependencies = "YES">
 8 |       <BuildActionEntries>
 9 |          <BuildActionEntry
10 |             buildForTesting = "YES"
11 |             buildForRunning = "YES"
12 |             buildForProfiling = "YES"
13 |             buildForArchiving = "YES"
14 |             buildForAnalyzing = "YES">
15 |             <BuildableReference
16 |                BuildableIdentifier = "primary"
17 |                BlueprintIdentifier = "03A3B0E21869ED890070BD43"
18 |                BuildableName = "unibinary"
19 |                BlueprintName = "unibinary"
20 |                ReferencedContainer = "container:unibinary.xcodeproj">
21 |             </BuildableReference>
22 |          </BuildActionEntry>
23 |       </BuildActionEntries>
24 |    </BuildAction>
25 |    <TestAction
26 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
27 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
28 |       shouldUseLaunchSchemeArgsEnv = "YES"
29 |       buildConfiguration = "Debug">
30 |       <Testables>
31 |       </Testables>
32 |       <MacroExpansion>
33 |          <BuildableReference
34 |             BuildableIdentifier = "primary"
35 |             BlueprintIdentifier = "03A3B0E21869ED890070BD43"
36 |             BuildableName = "unibinary"
37 |             BlueprintName = "unibinary"
38 |             ReferencedContainer = "container:unibinary.xcodeproj">
39 |          </BuildableReference>
40 |       </MacroExpansion>
41 |    </TestAction>
42 |    <LaunchAction
43 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
44 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
45 |       launchStyle = "0"
46 |       useCustomWorkingDirectory = "NO"
47 |       buildConfiguration = "Debug"
48 |       ignoresPersistentStateOnLaunch = "NO"
49 |       debugDocumentVersioning = "YES"
50 |       allowLocationSimulation = "YES">
51 |       <BuildableProductRunnable>
52 |          <BuildableReference
53 |             BuildableIdentifier = "primary"
54 |             BlueprintIdentifier = "03A3B0E21869ED890070BD43"
55 |             BuildableName = "unibinary"
56 |             BlueprintName = "unibinary"
57 |             ReferencedContainer = "container:unibinary.xcodeproj">
58 |          </BuildableReference>
59 |       </BuildableProductRunnable>
60 |       <EnvironmentVariables>
61 |          <EnvironmentVariable
62 |             key = "LC_ALL"
63 |             value = "en_US.utf8"
64 |             isEnabled = "YES">
65 |          </EnvironmentVariable>
66 |       </EnvironmentVariables>
67 |       <AdditionalOptions>
68 |       </AdditionalOptions>
69 |    </LaunchAction>
70 |    <ProfileAction
71 |       shouldUseLaunchSchemeArgsEnv = "YES"
72 |       savedToolIdentifier = ""
73 |       useCustomWorkingDirectory = "NO"
74 |       buildConfiguration = "Release"
75 |       debugDocumentVersioning = "YES">
76 |       <BuildableProductRunnable>
77 |          <BuildableReference
78 |             BuildableIdentifier = "primary"
79 |             BlueprintIdentifier = "03A3B0E21869ED890070BD43"
80 |             BuildableName = "unibinary"
81 |             BlueprintName = "unibinary"
82 |             ReferencedContainer = "container:unibinary.xcodeproj">
83 |          </BuildableReference>
84 |       </BuildableProductRunnable>
85 |    </ProfileAction>
86 |    <AnalyzeAction
87 |       buildConfiguration = "Debug">
88 |    </AnalyzeAction>
89 |    <ArchiveAction
90 |       buildConfiguration = "Release"
91 |       revealArchiveInOrganizer = "YES">
92 |    </ArchiveAction>
93 | </Scheme>
94 | 


--------------------------------------------------------------------------------
/c/unibinary.xcodeproj/xcuserdata/nst.xcuserdatad/xcschemes/xcschememanagement.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>SchemeUserState</key>
 6 | 	<dict>
 7 | 		<key>tests.xcscheme</key>
 8 | 		<dict>
 9 | 			<key>orderHint</key>
10 | 			<integer>1</integer>
11 | 		</dict>
12 | 		<key>unibinary.xcscheme</key>
13 | 		<dict>
14 | 			<key>orderHint</key>
15 | 			<integer>0</integer>
16 | 		</dict>
17 | 	</dict>
18 | 	<key>SuppressBuildableAutocreation</key>
19 | 	<dict>
20 | 		<key>03495DA31870810D00D81680</key>
21 | 		<dict>
22 | 			<key>primary</key>
23 | 			<true/>
24 | 		</dict>
25 | 		<key>03A3B0E21869ED890070BD43</key>
26 | 		<dict>
27 | 			<key>primary</key>
28 | 			<true/>
29 | 		</dict>
30 | 	</dict>
31 | </dict>
32 | </plist>
33 | 


--------------------------------------------------------------------------------
/c/unibinary/Makefile:
--------------------------------------------------------------------------------
 1 | CC=gcc
 2 | CFLAGS=-I. -Wall
 3 | 
 4 | unibinary: unibinary.o main.o
 5 | 	$(CC) -o unibinary main.o unibinary.o $(CFLAGS)
 6 | 
 7 | tests: unibinary.o tests.o
 8 | 	$(CC) -o tests unibinary.o tests.o $(CFLAGS)
 9 | 
10 | clean:
11 | 	rm -rf *o unibinary tests
12 | 


--------------------------------------------------------------------------------
/c/unibinary/main.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  main.c
  3 | //  unibinary
  4 | //
  5 | //  Created by Nicolas Seriot on 29/12/13.
  6 | //  Copyright (c) 2013 Nicolas Seriot. All rights reserved.
  7 | //
  8 | 
  9 | #include "unibinary.h"
 10 | #include <locale.h>
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | #include <getopt.h>
 14 | #include <unistd.h>
 15 | 
 16 | int display_usage(void) {
 17 |     printf("Usage: unibinary [-ed] [-sf] [-b num] [-h]\n");
 18 |     printf("\n");
 19 |     printf("UniBinary encodes and decodes data into printable Unicode characters.\n");
 20 |     printf("\n");
 21 |     printf("  -e, --encode\n");
 22 |     printf("  -d, --decode\n");
 23 |     printf("  -s, --string    to be encoded or decoded\n");
 24 |     printf("  -f, --filepath  to be encoded or decoded\n");
 25 |     printf("  -b, --break     break encoded string into num characters lines\n");
 26 |     printf("  -h, --help      show this help message and exit\n");
 27 |     return EXIT_SUCCESS;
 28 | }
 29 | 
 30 | static const struct option long_options[] =
 31 | {
 32 |     { "encode", no_argument, 0, 'e' },
 33 |     { "decode", no_argument, 0, 'd' },
 34 |     { "string", required_argument, 0, 's' },
 35 |     { "path", required_argument, 0, 'f' },
 36 |     { "break", required_argument, 0, 'b' },
 37 |     { "help", no_argument, 0, 'h' },
 38 |     { NULL, 0, NULL, 0 }
 39 | };
 40 | 
 41 | struct global_args_t {
 42 |     short encode;
 43 |     short decode;
 44 |     char *string;
 45 |     const char *path;
 46 |     short wrap;
 47 | } global_args;
 48 | 
 49 | int main(int argc, char * const argv[]) {
 50 | 
 51 |     //    $ echo test | ./unibinary -e | ./unibinary -d
 52 |     //    test
 53 | 
 54 |     char *old_locale = setlocale(LC_ALL, NULL);
 55 |     char *saved_locale = strdup(old_locale);
 56 |     if(saved_locale == NULL) return EXIT_FAILURE;
 57 |     
 58 |     setlocale(LC_CTYPE, "");
 59 |     
 60 |     static const char *opt_string = "eds:f:b:h";
 61 | 
 62 |     int opt = getopt_long( argc, argv, opt_string, long_options, NULL);
 63 |     while( opt != -1 ) {
 64 |         switch( opt ) {
 65 |             case 'e':
 66 |                 global_args.encode = 1;
 67 |                 break;
 68 |             case 'd':
 69 |                 global_args.decode = 1;
 70 |                 break;
 71 |             case 's':
 72 |                 global_args.string = optarg;
 73 |                 break;
 74 |             case 'f':
 75 |                 global_args.path = optarg;
 76 |                 break;
 77 |             case 'b':
 78 |                 global_args.wrap = atoi(optarg);
 79 |                 break;
 80 | //            case 'h':
 81 | //                display_usage();
 82 | //                goto exit_failure;
 83 |                 break;
 84 |             default:
 85 |                 break;
 86 |         }
 87 |         
 88 |         opt = getopt_long( argc, argv, opt_string, long_options, NULL);
 89 |     }
 90 |     
 91 |     if(global_args.encode) {
 92 |         // encode
 93 |         
 94 |         if(global_args.string != NULL) {
 95 |             // encode string
 96 |             wchar_t *wcs;
 97 |             unibinary_encode_string(global_args.string, &wcs, global_args.wrap);
 98 |             fwprintf(stdout, wcs);
 99 |             free(wcs);
100 |         } else if (global_args.path != NULL) {
101 |             // encode path
102 |             FILE *fd_in = fopen(global_args.path, "rb");
103 |             if(fd_in == NULL) goto exit_failure;
104 |             
105 |             int status = unibinary_encode(fd_in, stdout, global_args.wrap);
106 |             fclose(fd_in);
107 |             
108 |             if(status != 0) goto exit_failure;
109 |         } else {
110 |             // encode stdin
111 |             int status = unibinary_encode(stdin, stdout, global_args.wrap);
112 |             if(status != 0) goto exit_failure;
113 |         }
114 |     } else if (global_args.decode) {
115 |         // decode
116 |         
117 |         if(global_args.string != NULL) {
118 |             // decode string
119 |             size_t max_wchar_bytes = strlen(global_args.string) * MB_CUR_MAX;
120 |             wchar_t wcsout[max_wchar_bytes];
121 |             size_t nb_wc = mbstowcs(wcsout, global_args.string, max_wchar_bytes);
122 |             if(nb_wc == -1) goto exit_failure;
123 |             
124 |             char* data;
125 |             long dst_len;
126 |             unibinary_decode_string(wcsout, &data, &dst_len);
127 |             size_t written = fwrite(data, sizeof(char), dst_len, stdout);
128 |             free(data);
129 |             
130 |             if(written != dst_len) goto exit_failure;
131 |         
132 |         } else if (global_args.path != NULL) {
133 |             // decode path
134 |             FILE *fd_in = fopen(global_args.path, "rb");
135 |             if(fd_in == NULL) goto exit_failure;
136 |             
137 |             int status = unibinary_decode(fd_in, stdout);
138 |             fclose(fd_in);
139 |             
140 |             if(status != 0) goto exit_failure;
141 |         } else {
142 |             // decode stdin
143 |             int status = unibinary_decode(stdin, stdout);
144 |             if(status != 0) goto exit_failure;
145 | 
146 |         }
147 |         
148 |     } else {
149 |         display_usage();
150 |     }
151 |     
152 |     if(global_args.encode || global_args.decode) {
153 |         // add a newline if stdout is not piped
154 |         if (isatty(fileno(stdout)) == 1) {
155 |             fflush(stdout);
156 |             fprintf(stderr, "\n");
157 |         }
158 |     }
159 |     
160 |     goto exit_success;
161 | 
162 | exit_success:
163 |     setlocale(LC_ALL, saved_locale);
164 |     free(saved_locale);
165 |     
166 |     return EXIT_SUCCESS;
167 | 
168 | exit_failure:
169 |     setlocale(LC_ALL, saved_locale);
170 |     free(saved_locale);
171 |     
172 |     return EXIT_FAILURE;
173 | }
174 | 


--------------------------------------------------------------------------------
/c/unibinary/tests.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  tests.c
  3 | //  unibinary
  4 | //
  5 | //  Created by Nicolas Seriot on 29/12/13.
  6 | //  Copyright (c) 2013 Nicolas Seriot. All rights reserved.
  7 | //
  8 | 
  9 | #include "unibinary.h"
 10 | 
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | #include <assert.h>
 14 | #include <locale.h>
 15 | 
 16 | int number_of_repeated_characters_at_index(const char* src, size_t i, size_t srcSize, int *n);
 17 | int unichr_12a_from_two_ascii(unsigned char c0, unsigned char c1, wchar_t *u0);
 18 | int two_twelve_bits_values_from_three_bytes(uint8_t c0, uint8_t c1, uint8_t c2, wchar_t *u0, wchar_t *u1);
 19 | int two_unichr_to_repeat_byte_ntimes(unsigned char c, int n, wchar_t *u0, wchar_t *u1);
 20 | int to_U08(uint8_t i, wchar_t *o);
 21 | int to_U12(wchar_t i, wchar_t *o);
 22 | int from_U12b(wchar_t i, wchar_t *o);
 23 | int is_in_U08b(wchar_t i);
 24 | int is_in_U12a(wchar_t i);
 25 | int bytes_from_u1_u2(wchar_t u1, wchar_t u2, uint8_t **buffer, size_t *bufferSize);
 26 | int U12a_to_8_8(wchar_t u, uint8_t *b0, uint8_t *b1);
 27 | int two_bytes_from_unichars(wchar_t u1, wchar_t u2, uint8_t *b1, uint8_t *b2);
 28 | int three_bytes_from_unichars(wchar_t u1, wchar_t u2, uint8_t *b1, uint8_t *b2, uint8_t *b3);
 29 | 
 30 | int compareFiles(const char* filename1, const char* filename2) {
 31 |     // 0 if same contents
 32 |     
 33 |     FILE *f1 = fopen(filename1, "r");
 34 |     FILE *f2 = fopen(filename2, "r");
 35 |     
 36 |     // obtain file size:
 37 |     fseek (f1 , 0 , SEEK_END);
 38 |     long size1 = ftell (f1);
 39 |     rewind (f1);
 40 |     
 41 |     // obtain file size:
 42 |     fseek (f2 , 0 , SEEK_END);
 43 |     long size2 = ftell (f2);
 44 |     rewind (f2);
 45 |     
 46 |     if (size1 != size2) {
 47 |         printf("File sizes differ, %ld vs. %ld\n", size1, size2);
 48 |         fclose(f1);
 49 |         fclose(f2);
 50 |         return EXIT_FAILURE;
 51 |     }
 52 |     
 53 |     char tmp1, tmp2;
 54 |     
 55 |     int files_are_equal = 1;
 56 |     
 57 |     for (int i=0;i<size2;i++) {
 58 |         fread(&tmp1, 1, 1, f1);
 59 |         fread(&tmp2, 1, 1, f2);
 60 |         if (tmp1 != tmp2) {
 61 |             printf("%x: tmp1 0x%x != tmp2 0x%x\n",i , tmp1, tmp2);
 62 |             files_are_equal = 0;
 63 |             break;
 64 |         }
 65 |     }
 66 | 
 67 |     fclose(f1);
 68 |     fclose(f2);
 69 | 
 70 |     return files_are_equal ? 0 : -1;
 71 | }
 72 | 
 73 | void testRepeats(void) {
 74 |     
 75 |     printf("== %s ==\n", __func__);
 76 |     
 77 |     {
 78 |         const char *s = "aaaba";
 79 |         
 80 |         int n = 0;
 81 |         int status = number_of_repeated_characters_at_index(s, 0, strlen(s), &n);
 82 |         
 83 |         assert(status == EXIT_SUCCESS);
 84 |         assert(n == 3);
 85 |     }
 86 |     
 87 |     {
 88 |         const char *s = "";
 89 |         
 90 |         int n = 0;
 91 |         int status = number_of_repeated_characters_at_index(s, 0, strlen(s), &n);
 92 |         
 93 |         assert(status == EXIT_SUCCESS);
 94 |         assert(n == 0);
 95 |     }
 96 |     
 97 |     {
 98 |         const char *s = "asdfg";
 99 |         
100 |         int n = 0;
101 |         int status = number_of_repeated_characters_at_index(s, 0, strlen(s), &n);
102 |         
103 |         assert(status == EXIT_SUCCESS);
104 |         assert(n == 1);
105 |     }
106 | }
107 | 
108 | void test_7_7_to_12(void) {
109 |     
110 |     printf("== %s ==\n", __func__);
111 |     
112 |     {
113 |         wchar_t u0;
114 |         int status = unichr_12a_from_two_ascii('Z', 'E', &u0);
115 |         assert(status == EXIT_SUCCESS);
116 |         assert(u0 == 0x9485);
117 |     }
118 |     
119 |     {
120 |         wchar_t u0;
121 |         int status = unichr_12a_from_two_ascii('z', ',', &u0);
122 |         assert(status == EXIT_SUCCESS);
123 |         assert(u0 == 0x8CAC);
124 |     }
125 | }
126 | 
127 | void test_8_8_8_to_12_12(void) {
128 |     
129 |     printf("== %s ==\n", __func__);
130 |     
131 |     wchar_t u0, u1 = 0;
132 |     
133 |     uint8_t c0 = 0xAB;
134 |     uint8_t c1 = 0xCD;
135 |     uint8_t c2 = 0xEF;
136 |     
137 |     int status = two_twelve_bits_values_from_three_bytes(c0, c1, c2, &u0, &u1);
138 |     
139 |     assert(status == EXIT_SUCCESS);
140 |     
141 |     assert(u0 == 0xABC);
142 |     assert(u1 == 0xDEF);
143 | }
144 | 
145 | void test_ascii_encoding(void) {
146 |     
147 |     printf("== %s ==\n", __func__);
148 |     
149 |     FILE *fd_in = fopen("/tmp/test_ascii_encoding_src", "wb+");
150 |     char *s = "abc";
151 |     fwrite(s, 1, strlen(s), fd_in);
152 |     fclose(fd_in);
153 |     
154 |     /**/
155 |     
156 |     FILE *fd_out = fopen("/tmp/test_ascii_encoding", "wb+");
157 |     assert(fd_out != NULL);
158 |     
159 |     FILE *fd_in2 = fopen("/tmp/test_ascii_encoding_src", "r");
160 |     
161 |     int status = unibinary_encode(fd_in2, fd_out, 0);
162 |     assert(status == EXIT_SUCCESS);
163 |     
164 |     fclose(fd_in2);
165 |     fclose(fd_out);
166 |     
167 |     /**/
168 |     
169 |     FILE *fd_in3 = fopen("/tmp/test_ascii_encoding", "rb");
170 |     
171 |     wchar_t ctrl0 = fgetwc(fd_in3);
172 |     wchar_t ctrl1 = fgetwc(fd_in3);
173 |     
174 |     assert(ctrl0 == 0x9662);
175 |     assert(ctrl1 == 0x0463);
176 |     
177 |     fclose(fd_in3);
178 | }
179 | 
180 | void test_encode_3_bytes(void) {
181 |     
182 |     printf("== %s ==\n", __func__);
183 |     
184 |     FILE *fd_in = fopen("/tmp/test_encode_3_bytes_src", "wb+");
185 |     char *s = "\xAB\xCD\xEF";
186 |     fwrite(s, 1, strlen(s), fd_in);
187 |     fclose(fd_in);
188 |     
189 |     FILE *fd_out = fopen("/tmp/test_encode_3_bytes", "wb+");
190 |     assert(fd_out != NULL);
191 |     
192 |     FILE *fd_in2 = fopen("/tmp/test_encode_3_bytes_src", "r");
193 |     
194 |     int status = unibinary_encode(fd_in2, fd_out, 0);
195 |     assert(status == EXIT_SUCCESS);
196 |     
197 |     fclose(fd_in2);
198 |     fclose(fd_out);
199 |     
200 |     /**/
201 |     
202 |     FILE *fd_in3 = fopen("/tmp/test_encode_3_bytes", "rb");
203 |     
204 |     wchar_t ctrl0 = fgetwc(fd_in3);
205 |     wchar_t ctrl1 = fgetwc(fd_in3);
206 |     
207 |     assert(EOF == fgetwc(fd_in3));
208 |     
209 |     wchar_t file0, file1;
210 |     
211 |     assert(0 == to_U12(0xABC, &file0));
212 |     assert(0 == to_U12(0xDEF, &file1));
213 |     
214 |     assert(ctrl0 == file0);
215 |     assert(ctrl1 == file1);
216 |     
217 |     fclose(fd_in3);
218 | }
219 | 
220 | void test_encode_5_bytes(void) {
221 |     
222 |     printf("== %s ==\n", __func__);
223 |     
224 |     FILE *fd_in = fopen("/tmp/test_encode_5_bytes_src", "wb+");
225 |     char *s = "\xAB\xCD\xEF\xAB\xCD";
226 |     fwrite(s, 1, strlen(s), fd_in);
227 |     fclose(fd_in);
228 |     
229 |     FILE *fd_out = fopen("/tmp/test_encode_5_bytes", "wb+");
230 |     assert(fd_out != NULL);
231 |     
232 |     FILE *fd_in2 = fopen("/tmp/test_encode_5_bytes_src", "r");
233 |     
234 |     int error = unibinary_encode(fd_in2, fd_out, 0);
235 |     assert(error == 0);
236 |     
237 |     fclose(fd_in2);
238 |     fclose(fd_out);
239 |     
240 |     /**/
241 |     
242 |     FILE *fd_in3 = fopen("/tmp/test_encode_5_bytes", "rb");
243 |     
244 |     wchar_t ctrl0 = fgetwc(fd_in3);
245 |     wchar_t ctrl1 = fgetwc(fd_in3);
246 |     wchar_t ctrl2 = fgetwc(fd_in3);
247 |     wchar_t ctrl3 = fgetwc(fd_in3);
248 |     
249 |     assert(EOF == fgetwc(fd_in3));
250 |     
251 |     wchar_t file0, file1, file2, file3;
252 |     
253 |     assert(0 == to_U12(0xABC, &file0));
254 |     assert(0 == to_U12(0xDEF, &file1));
255 |     assert(0 == to_U08(0xAB, &file2));
256 |     assert(0 == to_U08(0xCD, &file3));
257 |     
258 |     assert(ctrl0 == file0);
259 |     assert(ctrl1 == file1);
260 |     assert(ctrl2 == file2);
261 |     assert(ctrl3 == file3);
262 |     
263 |     fclose(fd_in3);
264 | }
265 | 
266 | void test_unichr_12_encoding_decoding(void) {
267 |     
268 |     printf("== %s ==\n", __func__);
269 |     
270 |     static const unsigned int SIZE = 8;
271 |     
272 |     wchar_t unichr[] = {0x0, 0x1, 0xAB, 0x123, 0xABC, 0xF, 0xFF, 0xFFF};
273 |     
274 |     for(size_t i = 0; i < SIZE; i++) {
275 |         wchar_t o = 0;
276 |         assert(0 == to_U12(unichr[i], &o));
277 |         
278 |         wchar_t o2 = 0;
279 |         assert(0 == from_U12b(o, &o2));
280 |         
281 |         assert(unichr[i] == o2);
282 |     }
283 | }
284 | 
285 | void test_is_in_U8b(void) {
286 |     
287 |     printf("== %s ==\n", __func__);
288 |     
289 |     assert(!is_in_U08b(0x03FF));
290 |     
291 |     assert(is_in_U08b(0x0400));
292 |     assert(is_in_U08b(0x04FF));
293 |     
294 |     assert(!is_in_U08b(0x0500));
295 | }
296 | 
297 | void test_decode_unichars(void) {
298 |     
299 |     printf("== %s ==\n", __func__);
300 |     
301 |     wchar_t u0, u1;
302 |     assert(to_U12(0xABC, &u0) == 0);
303 |     assert(to_U12(0xDEF, &u1) == 0);
304 |     
305 |     FILE *fd_in = fopen("/tmp/test_decode_unichars_in", "wb+");
306 |     fputwc(u0, fd_in);
307 |     fputwc(u1, fd_in);
308 |     fclose(fd_in);
309 |     
310 |     /**/
311 |     
312 |     FILE *fd_in2 = fopen("/tmp/test_decode_unichars_in", "rb");
313 |     assert(fd_in2 != NULL);
314 |     
315 |     FILE *fd_out = fopen("/tmp/test_decode_unichars_out", "wb+");
316 |     assert(fd_out != NULL);
317 |     
318 |     int status = unibinary_decode(fd_in2, fd_out);
319 |     assert(status == EXIT_SUCCESS);
320 |     
321 |     fclose(fd_in2);
322 |     fclose(fd_out);
323 |     
324 |     /**/
325 |     
326 |     FILE *fd_out2 = fopen("/tmp/test_decode_unichars_out", "rb");
327 |     assert(fd_out2 != NULL);
328 |     
329 |     assert(fgetc(fd_out2) == 0xAB);
330 |     assert(fgetc(fd_out2) == 0xCD);
331 |     assert(fgetc(fd_out2) == 0xEF);
332 |     assert(fgetc(fd_out2) == EOF);
333 |     
334 |     fclose(fd_out2);
335 | }
336 | 
337 | void test_encode_decode_file(void) {
338 |     
339 |     printf("== %s ==\n", __func__);
340 | 
341 |     const char *file_name = "/bin/date";
342 | 
343 |     {
344 |         FILE *fd_in = fopen(file_name, "rb");
345 |         
346 |         if(fd_in == NULL) {
347 |             fprintf(stderr, "-- skip this test, cannot open file to encode: %s\n", file_name);
348 |         }
349 |         
350 |         FILE *fd_out = fopen("/tmp/date.txt", "wb+");
351 |         
352 |         int status = unibinary_encode(fd_in, fd_out, 0);
353 |         assert(status == EXIT_SUCCESS);
354 |         
355 |         fclose(fd_out);
356 |     }
357 |     
358 |     /**/
359 |     
360 |     {
361 |         FILE *fd_in = fopen("/tmp/date.txt", "rb");
362 |         assert(fd_in != NULL);
363 |         
364 |         FILE *fd_out = fopen("/tmp/date_encoded_decoded", "wb+");
365 |         assert(fd_out != NULL);
366 |         
367 |         int status = unibinary_decode(fd_in, fd_out);
368 |         assert(status == EXIT_SUCCESS);
369 |         
370 |         fclose(fd_out);
371 |         fclose(fd_in);
372 |     }
373 |     
374 |     int error = compareFiles(file_name, "/tmp/date_encoded_decoded");
375 |     assert(error == 0);
376 | }
377 | 
378 | void test_repeats_2(void) {
379 |     
380 |     printf("== %s ==\n", __func__);
381 |     
382 |     FILE *fd_in = fopen("/tmp/test_repeats_2_src", "wb+");
383 |     fputc(0xAB, fd_in);
384 |     fputc(0xCD, fd_in);
385 |     fputc(0xEF, fd_in);
386 |     fputc(0xFF, fd_in);
387 |     fputc(0xFF, fd_in);
388 |     fputc(0xFF, fd_in);
389 |     fputc(0xFF, fd_in);
390 |     fputc(0x00, fd_in);
391 |     
392 |     fclose(fd_in);
393 |     
394 |     /**/
395 |     
396 |     FILE *fd_out = fopen("/tmp/test_repeats_2", "wb+");
397 |     assert(fd_out != NULL);
398 |     
399 |     FILE *fd_in2 = fopen("/tmp/test_repeats_2_src", "r");
400 |     
401 |     int status = unibinary_encode(fd_in2, fd_out, 0);
402 |     assert(status == EXIT_SUCCESS);
403 |     
404 |     fclose(fd_in2);
405 |     fclose(fd_out);
406 |     
407 |     /**/
408 |     
409 |     FILE *fd_in3 = fopen("/tmp/test_repeats_2", "rb");
410 |     
411 |     assert(fgetwc(fd_in) == 0x58BC);
412 |     assert(fgetwc(fd_in) == 0x5BEF);
413 |     assert(fgetwc(fd_in) == 0x04FF);
414 |     assert(fgetwc(fd_in) == 0x4E04);
415 |     assert(fgetwc(fd_in) == 0x0400);
416 |     
417 |     assert(fgetwc(fd_in) == WEOF);
418 |     
419 |     fclose(fd_in3);
420 | }
421 | 
422 | void test_one_char(void) {
423 |     
424 |     printf("== %s ==\n", __func__);
425 |     
426 |     FILE *fd_in = fopen("/tmp/test_one_char_src", "wb+");
427 |     fputc('a', fd_in);
428 |     fclose(fd_in);
429 |     
430 |     /**/
431 |     
432 |     FILE *fd_out = fopen("/tmp/test_one_char", "wb+");
433 |     assert(fd_out != NULL);
434 |     
435 |     FILE *fd_in2 = fopen("/tmp/test_one_char_src", "r");
436 |     
437 |     int status = unibinary_encode(fd_in2, fd_out, 0);
438 |     assert(status == EXIT_SUCCESS);
439 |     
440 |     fclose(fd_in2);
441 |     fclose(fd_out);
442 |     
443 |     /**/
444 |     
445 |     FILE *fd_in3 = fopen("/tmp/test_one_char", "rb");
446 |     
447 |     assert(fgetwc(fd_in) == 0x0461);
448 |     
449 |     assert(fgetwc(fd_in) == WEOF);
450 |     
451 |     fclose(fd_in3);
452 | }
453 | 
454 | void test_empty_string(void) {
455 |     
456 |     printf("== %s ==\n", __func__);
457 |     
458 |     FILE *fd_in = fopen("/tmp/test_empty_string_src", "wb+");
459 |     fclose(fd_in);
460 |     
461 |     /**/
462 |     
463 |     FILE *fd_out = fopen("/tmp/test_empty_string", "wb+");
464 |     assert(fd_out != NULL);
465 |     
466 |     FILE *fd_in2 = fopen("/tmp/test_empty_string_src", "r");
467 |     
468 |     int status = unibinary_encode(fd_in2, fd_out, 0);
469 |     assert(status == EXIT_SUCCESS);
470 |     
471 |     fclose(fd_in2);
472 |     fclose(fd_out);
473 |     
474 |     /**/
475 |     
476 |     FILE *fd_in3 = fopen("/tmp/test_empty_string", "rb");
477 |     assert(fgetwc(fd_in) == WEOF);
478 |     fclose(fd_in3);
479 | }
480 | 
481 | void test_two_unichr_to_repeat_byte_ntimes(void) {
482 |     
483 |     printf("== %s ==\n", __func__);
484 |     
485 |     wchar_t u0, u1;
486 |     int status = two_unichr_to_repeat_byte_ntimes('a', 10, &u0, &u1);
487 |     assert(status == EXIT_SUCCESS);
488 |     
489 |     assert(u0 == 0x0461);
490 |     assert(u1 == 0x4E0A);
491 | }
492 | 
493 | void test_big_repeats_2000_minus_2(void) {
494 |     
495 |     printf("== %s ==\n", __func__);
496 |     
497 |     FILE *fd_in = fopen("/tmp/test_big_repeats_2000_minus_2_src", "wb+");
498 |     size_t COUNT = 0x2000 - 2;
499 |     size_t i = 0;
500 |     for(i = 0; i < COUNT; i++) {
501 |         fputc(0xAA, fd_in);
502 |     }
503 |     fclose(fd_in);
504 |     
505 |     /**/
506 |     
507 |     FILE *fd_out = fopen("/tmp/test_big_repeats_2000_minus_2", "wb+");
508 |     assert(fd_out != NULL);
509 |     
510 |     FILE *fd_in2 = fopen("/tmp/test_big_repeats_2000_minus_2_src", "r");
511 |     
512 |     int status = unibinary_encode(fd_in2, fd_out, 0);
513 |     assert(status == EXIT_SUCCESS);
514 |     
515 |     fclose(fd_in2);
516 |     fclose(fd_out);
517 |     
518 |     /**/
519 |     
520 |     FILE *fd_in3 = fopen("/tmp/test_big_repeats_2000_minus_2", "rb");
521 |     
522 |     assert(fgetwc(fd_in) == 0x04AA);
523 |     assert(fgetwc(fd_in) == 0x5DFF);
524 |     assert(fgetwc(fd_in) == 0x04AA);
525 |     assert(fgetwc(fd_in) == 0x5DFF);
526 |     
527 |     assert(fgetwc(fd_in) == WEOF);
528 |     
529 |     fclose(fd_in3);
530 | }
531 | 
532 | void test_big_repeats_2000(void) {
533 |     
534 |     printf("== %s ==\n", __func__);
535 |     
536 |     FILE *fd_in = fopen("/tmp/test_big_repeats_2000_src", "wb+");
537 |     size_t COUNT = 0x2000;
538 |     size_t i = 0;
539 |     for(i = 0; i < COUNT; i++) {
540 |         fputc(0xAA, fd_in);
541 |     }
542 |     fclose(fd_in);
543 |     
544 |     /**/
545 |     
546 |     FILE *fd_out = fopen("/tmp/test_big_repeats_2000", "wb+");
547 |     assert(fd_out != NULL);
548 |     
549 |     FILE *fd_in2 = fopen("/tmp/test_big_repeats_2000_src", "r");
550 |     
551 |     int status = unibinary_encode(fd_in2, fd_out, 0);
552 |     assert(status == EXIT_SUCCESS);
553 |     
554 |     fclose(fd_in2);
555 |     fclose(fd_out);
556 |     
557 |     /**/
558 |     
559 |     FILE *fd_in3 = fopen("/tmp/test_big_repeats_2000", "rb");
560 |     
561 |     assert(fgetwc(fd_in) == 0x04AA);
562 |     assert(fgetwc(fd_in) == 0x5DFF);
563 |     assert(fgetwc(fd_in) == 0x04AA);
564 |     assert(fgetwc(fd_in) == 0x5DFF);
565 |     assert(fgetwc(fd_in) == 0x04AA);
566 |     assert(fgetwc(fd_in) == 0x04AA);
567 |     
568 |     assert(fgetwc(fd_in) == WEOF);
569 |     
570 |     fclose(fd_in3);
571 | }
572 | 
573 | void test_encode_macho_header(void) {
574 |     
575 |     FILE *fd_in = fopen("/tmp/test_encode_macho_header_src", "wb+");
576 |     //    const char* s = "\xCF\xFA\xED\xFE\x07\x00\x00\x01";
577 |     fputc(0xCF, fd_in);
578 |     fputc(0xFA, fd_in);
579 |     fputc(0xED, fd_in);
580 |     fputc(0xFE, fd_in);
581 |     fputc(0x07, fd_in);
582 |     fputc(0x00, fd_in);
583 |     fputc(0x00, fd_in);
584 |     fputc(0x01, fd_in);
585 |     fclose(fd_in);
586 |     
587 |     FILE *fd_out = fopen("/tmp/test_encode_macho_header", "wb+");
588 |     assert(fd_out != NULL);
589 |     
590 |     FILE *fd_in2 = fopen("/tmp/test_encode_macho_header_src", "r");
591 |     
592 |     int status = unibinary_encode(fd_in2, fd_out, 0);
593 |     assert(status == EXIT_SUCCESS);
594 |     
595 |     fclose(fd_in2);
596 |     fclose(fd_out);
597 |     
598 |     /**/
599 |     
600 |     FILE *fd_in3 = fopen("/tmp/test_encode_macho_header", "rb");
601 |     
602 |     assert(fgetwc(fd_in3) == 0x5AFF);
603 |     assert(fgetwc(fd_in3) == 0x58ED);
604 |     assert(fgetwc(fd_in3) == 0x5DE0);
605 |     assert(fgetwc(fd_in3) == 0x5500);
606 |     assert(fgetwc(fd_in3) == 0x5E01);
607 |     
608 |     assert(fgetwc(fd_in3) == WEOF);
609 |     
610 |     fclose(fd_in3);
611 | }
612 | 
613 | void test_decode_bytes_from_string_4_ascii(void) {
614 |     
615 |     printf("== %s ==\n", __func__);
616 |     
617 |     FILE *fd_out = fopen("/tmp/test_decode_bytes_from_string_4_ascii_in", "wb+");
618 |     fputwc(0x9b25, fd_out);
619 |     fputwc(0x9af4, fd_out);
620 |     fclose(fd_out);
621 |     
622 |     /**/
623 |     
624 |     FILE *fd_in = fopen("/tmp/test_decode_bytes_from_string_4_ascii_in", "rb+");
625 |     assert(fd_in != NULL);
626 |     
627 |     FILE *fd_out2 = fopen("/tmp/test_decode_bytes_from_string_4_ascii_out", "wb+");
628 |     assert(fd_out2 != NULL);
629 |     
630 |     int status = unibinary_decode(fd_in, fd_out2);
631 |     assert(status == EXIT_SUCCESS);
632 |     
633 |     fclose(fd_in);
634 |     fclose(fd_out2);
635 |     
636 |     /**/
637 |     
638 |     FILE *fd_out3 = fopen("/tmp/test_decode_bytes_from_string_4_ascii_out", "rb");
639 |     assert(fd_out3 != NULL);
640 |     
641 |     assert(fgetc(fd_out3) == 't');
642 |     assert(fgetc(fd_out3) == 'e');
643 |     assert(fgetc(fd_out3) == 's');
644 |     assert(fgetc(fd_out3) == 't');
645 |     assert(fgetc(fd_out3) == EOF);
646 |     
647 |     fclose(fd_out3);
648 | }
649 | 
650 | void test_3_bytes_from_unichar(void) {
651 |     
652 |     printf("== %s ==\n", __func__);
653 |     
654 |     wchar_t u1 = 0x58BC;
655 |     wchar_t u2 = 0x5BEF;
656 |     
657 |     uint8_t b0, b1, b2;
658 |     
659 |     int status = three_bytes_from_unichars(u1, u2, &b0, &b1, &b2);
660 |     assert(status == EXIT_SUCCESS);
661 |     assert(b0 == 0xAB);
662 |     assert(b1 == 0xCD);
663 |     assert(b2 == 0xEF);
664 | }
665 | 
666 | void test_repeat(void) {
667 |     
668 |     printf("== %s ==\n", __func__);
669 |     
670 |     FILE *fd_in = fopen("/tmp/test_repeat_src", "wb+");
671 |     fwrite("xxx", sizeof(char), 3, fd_in);
672 |     fclose(fd_in);
673 |     
674 |     /**/
675 |     
676 |     FILE *fd_out = fopen("/tmp/test_repeat", "wb+");
677 |     assert(fd_out != NULL);
678 |     
679 |     FILE *fd_in2 = fopen("/tmp/test_repeat_src", "r");
680 |     
681 |     int status = unibinary_encode(fd_in2, fd_out, 0);
682 |     assert(status == EXIT_SUCCESS);
683 |     
684 |     fclose(fd_in2);
685 |     fclose(fd_out);
686 |     
687 |     /**/
688 |     
689 |     FILE *fd_in3 = fopen("/tmp/test_repeat", "rb");
690 |     
691 |     assert(fgetwc(fd_in) == 0x0478);
692 |     assert(fgetwc(fd_in) == 0x4E03);
693 |     
694 |     assert(fgetwc(fd_in) == WEOF);
695 |     
696 |     fclose(fd_in3);
697 | }
698 | 
699 | void test_string_encoding(void) {
700 |     printf("== %s ==\n", __func__);
701 | 
702 |     wchar_t *wcs;
703 |     unibinary_encode_string("test", &wcs, 0);
704 |     
705 |     assert(wcscmp(wcs, L"\u9B25\u9AF4") == 0);
706 |     free(wcs);
707 | }
708 | 
709 | void test_string_decoding(void) {
710 |     printf("== %s ==\n", __func__);
711 | 
712 |     char* data;
713 |     long dst_len;
714 |     unibinary_decode_string(L"\u9B25\u9AF4", &data, &dst_len);
715 |     assert(dst_len == 4);
716 |     assert(strcmp("test", data) == 0);
717 |     free(data);
718 | }
719 | 
720 | void test_string_decoding_with_newline(void) {
721 |     printf("== %s ==\n", __func__);
722 |     
723 |     char* data;
724 |     long dst_len;
725 |     unibinary_decode_string(L"\u9B25\n\u9AF4", &data, &dst_len);
726 |     assert(dst_len == 4);
727 |     assert(strcmp("test", data) == 0);
728 |     free(data);
729 | }
730 | 
731 | int main(int argc, const char * argv[]) {
732 |     
733 |     setlocale(LC_CTYPE, "UTF-8");
734 |     setlocale(LC_CTYPE, "");
735 |     
736 |     printf("The current locale is %s.\n", setlocale(LC_CTYPE, ""));
737 |     
738 |     test_empty_string();
739 |     test_one_char();
740 |     test_repeats_2();
741 |     test_unichr_12_encoding_decoding();
742 |     testRepeats();
743 |     test_7_7_to_12();
744 |     test_8_8_8_to_12_12();
745 |     test_encode_3_bytes();
746 |     test_encode_5_bytes();
747 |     test_is_in_U8b();
748 |     test_ascii_encoding();
749 |     test_two_unichr_to_repeat_byte_ntimes();
750 |     test_encode_macho_header();
751 |     test_decode_bytes_from_string_4_ascii();
752 |     test_3_bytes_from_unichar();
753 |     test_decode_unichars();
754 |     test_big_repeats_2000_minus_2();
755 |     test_big_repeats_2000();
756 |     test_repeat();
757 |     test_encode_decode_file();
758 |     test_string_encoding();
759 |     test_string_decoding();
760 |     test_string_decoding_with_newline();
761 | 
762 |     printf("-- ALL TESTS ARE OK --\n");
763 |     
764 |     return 0;
765 | }
766 | 


--------------------------------------------------------------------------------
/c/unibinary/unibinary.1:
--------------------------------------------------------------------------------
 1 | .\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples.
 2 | .\"See Also:
 3 | .\"man mdoc.samples for a complete listing of options
 4 | .\"man mdoc for the short list of editing options
 5 | .\"/usr/share/misc/mdoc.template
 6 | .Dd 24/12/13               \" DATE 
 7 | .Dt unibinary 1      \" Program name and manual section number 
 8 | .Os Darwin
 9 | .Sh NAME                 \" Section Header - required - don't modify 
10 | .Nm unibinary,
11 | .\" The following lines are read in generating the apropos(man -k) database. Use only key
12 | .\" words here as the database is built based on the words here and in the .ND line. 
13 | .Nm Other_name_for_same_program(),
14 | .Nm Yet another name for the same program.
15 | .\" Use .Nm macro to designate other names for the documented program.
16 | .Nd This line parsed for whatis database.
17 | .Sh SYNOPSIS             \" Section Header - required - don't modify
18 | .Nm
19 | .Op Fl abcd              \" [-abcd]
20 | .Op Fl a Ar path         \" [-a path] 
21 | .Op Ar file              \" [file]
22 | .Op Ar                   \" [file ...]
23 | .Ar arg0                 \" Underlined argument - use .Ar anywhere to underline
24 | arg2 ...                 \" Arguments
25 | .Sh DESCRIPTION          \" Section Header - required - don't modify
26 | Use the .Nm macro to refer to your program throughout the man page like such:
27 | .Nm
28 | Underlining is accomplished with the .Ar macro like this:
29 | .Ar underlined text .
30 | .Pp                      \" Inserts a space
31 | A list of items with descriptions:
32 | .Bl -tag -width -indent  \" Begins a tagged list 
33 | .It item a               \" Each item preceded by .It macro
34 | Description of item a
35 | .It item b
36 | Description of item b
37 | .El                      \" Ends the list
38 | .Pp
39 | A list of flags and their descriptions:
40 | .Bl -tag -width -indent  \" Differs from above in tag removed 
41 | .It Fl a                 \"-a flag as a list item
42 | Description of -a flag
43 | .It Fl b
44 | Description of -b flag
45 | .El                      \" Ends the list
46 | .Pp
47 | .\" .Sh ENVIRONMENT      \" May not be needed
48 | .\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1
49 | .\" .It Ev ENV_VAR_1
50 | .\" Description of ENV_VAR_1
51 | .\" .It Ev ENV_VAR_2
52 | .\" Description of ENV_VAR_2
53 | .\" .El                      
54 | .Sh FILES                \" File used or created by the topic of the man page
55 | .Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact
56 | .It Pa /usr/share/file_name
57 | FILE_1 description
58 | .It Pa /Users/joeuser/Library/really_long_file_name
59 | FILE_2 description
60 | .El                      \" Ends the list
61 | .\" .Sh DIAGNOSTICS       \" May not be needed
62 | .\" .Bl -diag
63 | .\" .It Diagnostic Tag
64 | .\" Diagnostic informtion here.
65 | .\" .It Diagnostic Tag
66 | .\" Diagnostic informtion here.
67 | .\" .El
68 | .Sh SEE ALSO 
69 | .\" List links in ascending order by section, alphabetically within a section.
70 | .\" Please do not reference files that do not exist without filing a bug report
71 | .Xr a 1 , 
72 | .Xr b 1 ,
73 | .Xr c 1 ,
74 | .Xr a 2 ,
75 | .Xr b 2 ,
76 | .Xr a 3 ,
77 | .Xr b 3 
78 | .\" .Sh BUGS              \" Document known, unremedied bugs 
79 | .\" .Sh HISTORY           \" Document history if command behaves in a unique manner


--------------------------------------------------------------------------------
/c/unibinary/unibinary.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  main.c
  3 | //  unibinary
  4 | //
  5 | //  Created by Nicolas Seriot on 24/12/13.
  6 | //  Copyright (c) 2013 Nicolas Seriot. All rights reserved.
  7 | //
  8 | 
  9 | #include "unibinary.h"
 10 | #include <string.h>
 11 | #include <stdlib.h>
 12 | #include <sys/mman.h>
 13 | 
 14 | // encodes ascii 7-bits characters
 15 | wchar_t U12a_0_0_start = 0x5E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 0,0
 16 | wchar_t U12a_0_1_start = 0x6E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 0,1
 17 | wchar_t U12a_1_0_start = 0x7E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 1,0
 18 | wchar_t U12a_1_1_start = 0x8E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 1,1
 19 | wchar_t U12a_length = 0x1000;
 20 | 
 21 | // encodes arbitrary bits
 22 | wchar_t U12b_start = 0x4E00; // CJK Unified Ideographs (subset) - encodes 12 bits
 23 | wchar_t U12b_length = 0x1000;
 24 | wchar_t U8_start = 0x0400;   // Cyrillic                        - encodes 8 bits
 25 | wchar_t U8_length = 0x0100;
 26 | 
 27 | int is_in_U08b(wchar_t i) {
 28 |     return i >= U8_start && i < (U8_start + U8_length);
 29 | }
 30 | 
 31 | int is_in_U12a(wchar_t u) {
 32 |     
 33 |     wchar_t starts[4] = {U12a_0_0_start, U12a_0_1_start, U12a_1_0_start, U12a_1_1_start};
 34 |     
 35 |     for (int i = 0; i < 4; i++) {
 36 |         wchar_t start = starts[i];
 37 |         if(u >= start && u < (start + U12a_length)) {
 38 |             return 1;
 39 |         }
 40 |     }
 41 |     
 42 |     return EXIT_SUCCESS;
 43 | }
 44 | 
 45 | int is_in_U12b(wchar_t u) {
 46 |     return u >= U12b_start && u < (U12b_start + U12b_length);
 47 | }
 48 | 
 49 | int two_unichr_to_repeat_byte_ntimes(unsigned char c, int n, wchar_t *u0, wchar_t *u1) {
 50 |     
 51 |     if (c > 0xFF) return EXIT_FAILURE;
 52 |     if (n > 0xFFF) return EXIT_FAILURE;
 53 |     
 54 |     *u0 = U8_start + c;
 55 |     *u1 = U12b_start + n;
 56 |     
 57 |     return EXIT_SUCCESS;
 58 | }
 59 | 
 60 | int two_twelve_bits_values_from_three_bytes(unsigned char c0, unsigned char c1, unsigned char c2, wchar_t *u0, wchar_t *u1) {
 61 |     
 62 |     // (0x12, 0x34, 0x56) -> (0x123, 0x456)
 63 |     
 64 |     if (c0 > 0xFF || c1 > 0xFF || c2 > 0xFF) return EXIT_FAILURE;
 65 |     
 66 |     *u0 = (c0 << 4) + (c1 >> 4);
 67 |     *u1 = ((c1 & 0xF) << 8) + c2;
 68 |     
 69 |     return EXIT_SUCCESS;
 70 | }
 71 | 
 72 | int unichr_12a_from_two_ascii(unsigned char c0, unsigned char c1, wchar_t *u0) {
 73 |     
 74 |     int unicode_start = 0;
 75 |     
 76 |     if (c0 < 64 && c1 < 64) {
 77 |         unicode_start = U12a_0_0_start;
 78 |     } else if (c0 < 64 && c1 >= 64) {
 79 |         c1 -= 64;
 80 |         unicode_start = U12a_0_1_start;
 81 |     } else if (c0 >= 64 && c1 < 64) {
 82 |         c0 -= 64;
 83 |         unicode_start = U12a_1_0_start;
 84 |     } else if (c0 >= 64 && c1 >= 64) {
 85 |         c0 -= 64;
 86 |         c1 -= 64;
 87 |         unicode_start = U12a_1_1_start;
 88 |     }
 89 |     
 90 |     *u0 = unicode_start + (c0 << 6) + c1;
 91 |     
 92 |     return EXIT_SUCCESS;
 93 | }
 94 | 
 95 | int number_of_repeated_characters_at_index(const char* src, size_t i, size_t srcSize, int *n) {
 96 |     
 97 |     int repeats_count = 0;
 98 |     
 99 |     unsigned char c = src[i];
100 |     
101 |     while(i < srcSize && (c == (unsigned char)src[i])) {
102 |         repeats_count += 1;
103 |         i++;
104 |     }
105 |     
106 |     *n = repeats_count;
107 |     
108 |     return EXIT_SUCCESS;
109 | }
110 | 
111 | int to_U08(uint8_t i, wchar_t *o) {
112 |     
113 |     if (i > (U8_start + U8_length)) return EXIT_FAILURE;
114 |     
115 |     *o = U8_start + i;
116 |     
117 |     return EXIT_SUCCESS;
118 | }
119 | 
120 | int to_U12(wchar_t i, wchar_t *o) {
121 |     
122 |     if(i > (U12b_start + U12b_length)) return EXIT_FAILURE;
123 |     
124 |     *o = U12b_start + i;
125 |     
126 |     return EXIT_SUCCESS;
127 | }
128 | 
129 | int from_U12b(wchar_t i, wchar_t *o) {
130 |     
131 |     if(i < U12b_start || i > (U12b_start + U12b_length)) return EXIT_FAILURE;
132 |     
133 |     *o = i - U12b_start;
134 |     
135 |     return EXIT_SUCCESS;
136 | }
137 | 
138 | int U12a_to_8_8(wchar_t u, uint8_t *b0, uint8_t *b1) {
139 |     
140 |     wchar_t unicode_start = 0;
141 |     wchar_t starts[4] = {U12a_0_0_start, U12a_0_1_start, U12a_1_0_start, U12a_1_1_start};
142 |     
143 |     for (int i = 0; i < 4; i++) {
144 |         wchar_t start = starts[i];
145 |         if(u >= start && u < (start + U12a_length)) {
146 |             unicode_start = start;
147 |             break;
148 |         }
149 |     }
150 |     
151 |     if(unicode_start == 0) return EXIT_FAILURE;
152 |     
153 |     wchar_t value = u - unicode_start;
154 |     *b0 = (value & 0xFC0) >> 6;
155 |     *b1 = u & 0x3F;
156 |     
157 |     if(unicode_start == U12a_0_0_start) {
158 |         // pass
159 |     } else if (unicode_start == U12a_0_1_start) {
160 |         *b1 += 64;
161 |     } else if (unicode_start == U12a_1_0_start) {
162 |         *b0 += 64;
163 |     } else if (unicode_start == U12a_1_1_start) {
164 |         *b0 += 64;
165 |         *b1 += 64;
166 |     }
167 |     
168 |     return EXIT_SUCCESS;
169 | }
170 | 
171 | int int_from_u08b(wchar_t u, uint8_t* i) {
172 |     if(u < U8_start || u > (U8_start + U8_length)) return EXIT_FAILURE;
173 |     
174 |     *i = u - U8_start;
175 |     
176 |     return EXIT_SUCCESS;
177 | }
178 | 
179 | int int_from_u12b(wchar_t u, wchar_t* i) {
180 |     if(u < U12b_start || u > (U12b_start + U12b_length)) return EXIT_FAILURE;
181 |     
182 |     *i = u - U12b_start;
183 |     
184 |     return EXIT_SUCCESS;
185 | }
186 | 
187 | int repeated_bytes_from_unichars(wchar_t u1, wchar_t u2, uint8_t **dst, size_t *dstSize) {
188 |     
189 |     uint8_t b;
190 |     if(int_from_u08b(u1, &b) != 0) return EXIT_FAILURE;
191 |     
192 |     wchar_t n;
193 |     if(int_from_u12b(u2, &n) != 0) return EXIT_FAILURE;
194 |     
195 |     if(n > 0xFFF) {
196 |         fprintf(stderr, "-- bad number of repeats: 0x%x\n", n);
197 |         return EXIT_FAILURE;
198 |     }
199 |     
200 |     uint8_t *out = malloc(n * sizeof(uint8_t));
201 |     if(out == NULL) {
202 |         return EXIT_FAILURE;
203 |     }
204 |     
205 |     memset(out, b, n);
206 |     
207 |     *dstSize = n;
208 |     *dst = out;
209 |     
210 |     return EXIT_SUCCESS;
211 | }
212 | 
213 | int three_bytes_from_two_twelve_bits_values(wchar_t i1, wchar_t i2, uint8_t *b1, uint8_t *b2, uint8_t *b3) {
214 |     // (0x123, 0x456) -> (0x12, 0x34, 0x56)
215 |     
216 |     if(i1 > 0xFFF) return EXIT_FAILURE;
217 |     if(i2 > 0xFFF) return EXIT_FAILURE;
218 |     
219 |     *b1 = i1 >> 4;
220 |     *b2 = ((i1 & 0xF) << 4) + ((i2 & 0xF00) >> 8);
221 |     *b3 = (i2 & 0x0FF);
222 |     
223 |     return EXIT_SUCCESS;
224 | }
225 | 
226 | int three_bytes_from_unichars(wchar_t u1, wchar_t u2, uint8_t *b1, uint8_t *b2, uint8_t *b3) {
227 |     
228 |     wchar_t i1, i2;
229 |     
230 |     if(int_from_u12b(u1, &i1) != 0) return EXIT_FAILURE;
231 |     if(int_from_u12b(u2, &i2) != 0) return EXIT_FAILURE;
232 |     
233 |     int error = three_bytes_from_two_twelve_bits_values(i1, i2, b1, b2, b3);
234 |     if(error != 0) return EXIT_FAILURE;
235 |     
236 |     return EXIT_SUCCESS;
237 | }
238 | 
239 | int two_bytes_from_unichars(wchar_t u1, wchar_t u2, uint8_t *b1, uint8_t *b2) {
240 |     
241 |     int status = int_from_u08b(u1, b1);
242 |     if(status == EXIT_FAILURE) return EXIT_FAILURE;
243 | 
244 |     int status2 = int_from_u08b(u2, b2);
245 |     if(status2 == EXIT_FAILURE) return EXIT_FAILURE;
246 |     
247 |     return EXIT_SUCCESS;
248 | }
249 | 
250 | int bytes_from_u1_u2(wchar_t u1, wchar_t u2, uint8_t **buffer, size_t *bufferSize) {
251 |     
252 |     int u1_in_U12b = is_in_U12b(u1);
253 |     int u2_in_U12b = is_in_U12b(u2);
254 |     
255 |     int u1_in_U8b = is_in_U08b(u1);
256 |     int u2_in_U8b = is_in_U08b(u2);
257 |     
258 |     if(u1_in_U12b && u2_in_U12b) {
259 |         
260 |         *buffer = (uint8_t *)malloc(3 * sizeof(uint8_t));
261 |         if(buffer == NULL) {
262 |             fprintf(stderr, "-- malloc error\n");
263 |             return EXIT_FAILURE;
264 |         }
265 |         
266 |         *bufferSize = 3;
267 |         
268 |         uint8_t b0, b1, b2;
269 |         
270 |         int status = three_bytes_from_unichars(u1, u2, &b0, &b1, &b2);
271 |         if(status != 0) return EXIT_FAILURE;
272 |         
273 |         *(*buffer+0) = b0;
274 |         *(*buffer+1) = b1;
275 |         *(*buffer+2) = b2;
276 |         
277 |         return EXIT_SUCCESS;
278 |         
279 |     } else if (u1_in_U8b && u2_in_U12b) {
280 |         
281 |         size_t dstSize;
282 |         
283 |         uint8_t *out;
284 |         
285 |         int status = repeated_bytes_from_unichars(u1, u2, &out, &dstSize);
286 |         if(status != 0) return EXIT_FAILURE;
287 |         
288 |         *bufferSize = dstSize;
289 |         
290 |         *buffer = out;
291 |         
292 |         return EXIT_SUCCESS;
293 |     } else if (u1_in_U8b && u2_in_U8b) {
294 |         
295 |         uint8_t b0, b1;
296 |         int status = two_bytes_from_unichars(u1, u2, &b0, &b1);
297 |         if(status != 0) return EXIT_FAILURE;
298 |         
299 |         uint8_t *out = malloc(2 * sizeof(uint8_t));
300 |         if(out == NULL) {
301 |             fprintf(stderr, "-- malloc error\n");
302 |             return EXIT_FAILURE;
303 |         }
304 |         
305 |         *bufferSize = 2;
306 |         
307 |         out[0] = b0;
308 |         out[1] = b1;
309 |         
310 |         *buffer = out;
311 |         
312 |         return EXIT_SUCCESS;
313 |     } else if (u1_in_U8b && u2 == '\n') {
314 |         
315 |         *buffer = (uint8_t *)malloc(1 * sizeof(uint8_t));
316 |         if(buffer == NULL) {
317 |             fprintf(stderr, "-- malloc error\n");
318 |             return EXIT_FAILURE;
319 |         }
320 |         
321 |         *bufferSize = 1;
322 |         
323 |         uint8_t b0;
324 |         
325 |         int success = int_from_u08b(u1, &b0);
326 |         if(success == EXIT_FAILURE) {
327 |             fprintf(stderr, "-- error\n");
328 |             return EXIT_FAILURE;
329 |         }
330 |         
331 |         (*buffer)[0] = b0;
332 |         return EXIT_SUCCESS;
333 |     }
334 |     
335 |     fprintf(stderr, "-- bytes_from_u1_u2() cannot deal with u1:0x%x u2:0x%x\n", u1, u2);
336 |     fprintf(stderr, "   u1 in U8b:%d U12b:%d, u2 in U8b:%d U12b:%d\n", u1_in_U8b, u1_in_U12b, u2_in_U8b, u2_in_U12b);
337 |     return EXIT_FAILURE;
338 | }
339 | 
340 | int next_non_newline_char(FILE *src, wchar_t *wc) {
341 |     do {
342 |         *wc = fgetwc(src);
343 |     } while (*wc == '\n');
344 |     return EXIT_SUCCESS;
345 | }
346 | 
347 | int unibinary_decode(FILE *src, FILE *dst) {
348 |     
349 |     int i = 0;
350 |     
351 |     wchar_t c, c_next;
352 |     
353 |     int fwd_chars_read = 0;
354 |     
355 |     while (1) {
356 |         
357 |         if(fwd_chars_read == 0) {
358 |             next_non_newline_char(src, &c);
359 |             next_non_newline_char(src, &c_next);
360 |             fwd_chars_read = 2;
361 |         } else if (fwd_chars_read == 1) {
362 |             c = c_next;
363 |             next_non_newline_char(src, &c_next);
364 |             fwd_chars_read = 2;
365 |         } else {
366 |             // pass
367 |         }
368 |         
369 |         if(is_in_U12a(c)) {
370 |             uint8_t b0;
371 |             uint8_t b1;
372 |             
373 |             int error = U12a_to_8_8(c, &b0, &b1);
374 |             if(error != 0) {
375 |                 fprintf(stderr, "-- error in U12a_to_8_8()\n");
376 |                 return EXIT_FAILURE;
377 |             }
378 |             
379 |             fwrite(&b0, 1, 1, dst);
380 |             fwrite(&b1, 1, 1, dst);
381 |             
382 |             i += 1;
383 |             
384 |             fwd_chars_read -= 1;
385 |             
386 |         } else if (c != WEOF && c_next != WEOF) {
387 |             wchar_t u1 = c;
388 |             wchar_t u2 = c_next;
389 |             
390 |             uint8_t *outBuffer;
391 |             size_t outBufferSize;
392 |             
393 |             int error = bytes_from_u1_u2(u1, u2, &outBuffer, &outBufferSize);
394 |             if(error != 0) {
395 |                 fprintf(stderr, "-- error in bytes_from_u1_u2()\n");
396 |                 return EXIT_FAILURE;
397 |             }
398 |             
399 |             if(outBuffer == NULL) {
400 |                 fprintf(stderr, "-- outBuffer == NULL\n");
401 |             }
402 |             
403 |             fwrite(outBuffer, 1, outBufferSize, dst);
404 |             
405 |             i += 2;
406 |             
407 |             fwd_chars_read -= 2;
408 |             
409 |         } else if (is_in_U08b(c)) {
410 |             uint8_t b0;
411 |             int status = int_from_u08b(c, &b0);
412 |             if(status != 0) {
413 |                 fprintf(stderr, "-- error in int_from_u08b()\n");
414 |                 return EXIT_FAILURE;
415 |             }
416 |             
417 |             fwrite(&b0, 1, 1, dst);
418 |             
419 |             i += 1;
420 |             
421 |             fwd_chars_read -= 1;
422 |             
423 |         } else if (c == '\n') {
424 |             i += 1;
425 |             
426 |             if(fwd_chars_read > 0) {
427 |                 fwd_chars_read -= 1;
428 |             }
429 |         } else if (c == WEOF) {
430 |             break;
431 |         } else {
432 |             fprintf(stderr, "-- cannot decode character at index %u\n", i);
433 |             return EXIT_FAILURE;
434 |         }
435 |     }
436 |     
437 |     return EXIT_SUCCESS;
438 | }
439 | 
440 | int put_wc(FILE *fd_out, wchar_t wc, size_t *count, size_t wrap_length) {
441 |     if(fputwc(wc, fd_out) == EOF) return EXIT_FAILURE;
442 | 
443 |     *count += 1;
444 |     if(wrap_length > 0) {
445 |         *count = *count % wrap_length;
446 |     }
447 |     
448 |     if(*count == 0) {
449 |         if(fputwc('\n', fd_out) == EOF) return EXIT_FAILURE;
450 |     }
451 |     return EXIT_SUCCESS;
452 | }
453 | 
454 | int unibinary_encode_string(const char *src, wchar_t **dst, size_t wrap_length) {
455 |     
456 |     // 1. write src into a temporary file
457 |     
458 |     FILE *fd_in = tmpfile();
459 |     if(fd_in == NULL) return EXIT_FAILURE;
460 |     
461 |     size_t src_len = strlen(src);
462 |     size_t written = fwrite(src, 1, src_len, fd_in);
463 |     if(written != src_len) {
464 |         fclose(fd_in);
465 |         return EXIT_FAILURE;
466 |     }
467 |     rewind(fd_in);
468 |     
469 |     // 2. open another temporary file to write the encoded string
470 |     
471 |     FILE *fd_out = tmpfile();
472 |     if(fd_in == NULL) {
473 |         fclose(fd_in);
474 |         return EXIT_FAILURE;
475 |     }
476 |     
477 |     int status = unibinary_encode(fd_in, fd_out, wrap_length);
478 |     fclose(fd_in);
479 |     
480 |     if(status != 0) return EXIT_FAILURE;
481 |     
482 |     // 3. read the encoded string and fill *dst
483 |     
484 |     long file_size = ftell(fd_out);
485 |     
486 |     rewind(fd_out);
487 |     
488 |     long max_wchar_bytes_possible = file_size * MB_CUR_MAX;
489 |     
490 |     if(max_wchar_bytes_possible > INTMAX_MAX) {
491 |         fclose(fd_out);
492 |         return EXIT_FAILURE;
493 |     }
494 |     
495 |     char *map = mmap(0, file_size, PROT_READ, MAP_SHARED, fileno(fd_out), 0);
496 | 
497 |     fclose(fd_out);
498 | 
499 |     if(map == NULL) {
500 |         return EXIT_FAILURE;
501 |     }
502 |     
503 |     *dst = (wchar_t *)malloc(file_size * MB_CUR_MAX);
504 |     if(dst == NULL) {
505 |         fprintf(stderr, "-- malloc error\n");
506 |         return EXIT_FAILURE;
507 |     }
508 |     
509 |     size_t length = mbstowcs(*dst, map, file_size * MB_CUR_MAX);
510 | 
511 |     if(length == -1) {
512 |         free(dst);
513 |         return EXIT_FAILURE;
514 |     }
515 |     
516 |     return EXIT_SUCCESS;
517 | }
518 | 
519 | int unibinary_decode_string(const wchar_t *src, char **dst, long *dst_len) {
520 |     
521 |     // 1. write src into a temporary file
522 |     
523 |     FILE *fd_in = tmpfile();
524 |     if(fd_in == NULL) return EXIT_FAILURE;
525 |     
526 |     int status = fputws(src, fd_in);
527 |     if(status != 0) {
528 |         fclose(fd_in);
529 |         return EXIT_FAILURE;
530 |     }
531 |     
532 |     rewind(fd_in);
533 |     
534 |     // 2. open another temporary file to write decoded data
535 |     
536 |     FILE *fd_out = tmpfile();
537 |     if(fd_in == NULL) {
538 |         fclose(fd_in);
539 |         return EXIT_FAILURE;
540 |     }
541 |     
542 |     int status2 = unibinary_decode(fd_in, fd_out);
543 |     fclose(fd_in);
544 |     
545 |     if(status2 != 0) return EXIT_FAILURE;
546 |     
547 |     // 3. read the resulting string and fill **dst
548 |     
549 |     long file_size = ftell(fd_out);
550 |     
551 |     *dst_len = file_size;
552 |     
553 |     rewind(fd_out);
554 |     
555 |     *dst = (char *)malloc(file_size * sizeof(char));
556 |     if(dst == NULL) {
557 |         fprintf(stderr, "-- malloc error\n");
558 |         return EXIT_FAILURE;
559 |     }
560 |     
561 |     size_t read = fread(*dst, sizeof(char), file_size, fd_out);
562 |     fclose(fd_out);
563 | 
564 |     if(read != file_size) {
565 |         return EXIT_FAILURE;
566 |     }
567 |     
568 |     return EXIT_SUCCESS;
569 | }
570 | 
571 | int unibinary_encode(FILE *fd_in, FILE *fd_out, size_t wrap_length) {
572 |     
573 |     size_t out_count = 0;
574 |     
575 |     while(1) {
576 |         
577 |         unsigned char c0, c1, c2;
578 |         size_t read_c0 = fread(&c0, 1, 1, fd_in);
579 |         if(read_c0 == 0) break;
580 |         
581 |         size_t read_c1 = 0;
582 |         
583 |         long number_of_repeats = 1;
584 |         unsigned char cc;
585 |         size_t read_cc = fread(&cc, 1, 1, fd_in);
586 |         
587 |         while(read_cc != 0 && cc == c0 && number_of_repeats < 0xFFF) {
588 |             number_of_repeats += 1;
589 |             read_cc = fread(&cc, 1, 1, fd_in);
590 |         }
591 |         
592 |         /**/
593 |         
594 |         if(number_of_repeats >= 3) {
595 |             if(read_cc != 0) {
596 |                 if(ungetc(cc, fd_in) == EOF) return EXIT_FAILURE;
597 |             }
598 | 
599 |             long n = number_of_repeats;
600 |             
601 |             wchar_t u0, u1;
602 |             int error = two_unichr_to_repeat_byte_ntimes(c0, (int)n, &u0, &u1);
603 |             if(error) return EXIT_FAILURE;
604 |             
605 |             put_wc(fd_out, u0, &out_count, wrap_length);
606 |             put_wc(fd_out, u1, &out_count, wrap_length);
607 |             
608 |             continue;
609 |         } else if (number_of_repeats == 2) {
610 |             if(read_cc != 0) {
611 |                 if(ungetc(cc, fd_in) == EOF) return EXIT_FAILURE;
612 |             }
613 | 
614 |             read_c0 = 1;
615 |             read_c1 = 1;
616 | //            c0 = c0;
617 |             c1 = c0;
618 | 
619 |         } else if (read_cc != 0) {
620 |             read_c1 = 1;
621 |             c1 = cc;
622 |         }
623 |         
624 |         /**/
625 |         
626 |         size_t read_c2 = fread(&c2, 1, 1, fd_in);
627 |         
628 |         int two_ASCII_7bits_chars_available = read_c0 != 0 && read_c1 != 0 && c0 < 128 && c1 < 128;
629 |         int three_bytes_available = read_c0 != 0 && read_c1 != 0 && read_c2 != 0;
630 |         
631 |         if(two_ASCII_7bits_chars_available) {
632 |             
633 |             // put 2 x 7 bits into a unichar
634 |             wchar_t u0;
635 |             int error = unichr_12a_from_two_ascii(c0, c1, &u0);
636 |             if(error) return EXIT_FAILURE;
637 |             
638 |             put_wc(fd_out, u0, &out_count, wrap_length);
639 |             
640 |             if(read_c2 != 0) {
641 |                 if(ungetc(c2, fd_in) == EOF) return EXIT_FAILURE;
642 |             }
643 |             
644 |         } else if (three_bytes_available) {
645 |             // read 3 bytes, yield 2 unichars
646 |             wchar_t u0, u1;
647 |             int error = two_twelve_bits_values_from_three_bytes(c0, c1, c2, &u0, &u1);
648 |             if(error) return EXIT_FAILURE;
649 |             
650 |             wchar_t o0, o1;
651 |             error = to_U12(u0, &o0);
652 |             if(error != 0) return EXIT_FAILURE;
653 |             error = to_U12(u1, &o1);
654 |             if(error != 0) return EXIT_FAILURE;
655 |             
656 |             put_wc(fd_out, o0, &out_count, wrap_length);
657 |             put_wc(fd_out, o1, &out_count, wrap_length);
658 |             
659 |         } else if (read_c0 != 0) {
660 |             // read 1 byte, encode 1 unichar
661 |             wchar_t u0;
662 |             int error = to_U08(c0, &u0);
663 |             if(error) return EXIT_FAILURE;
664 |             
665 |             put_wc(fd_out, u0, &out_count, wrap_length);
666 |             
667 |             if(read_c1 != 0) {
668 |                 if(ungetc(c1, fd_in) == EOF) return EXIT_FAILURE;
669 |             }
670 |             
671 |         } else {
672 |             break;
673 |         }
674 |         
675 |     }
676 |     
677 |     return EXIT_SUCCESS;
678 | }
679 | 


--------------------------------------------------------------------------------
/c/unibinary/unibinary.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  unibinary.h
 3 | //  unibinary
 4 | //
 5 | //  Created by Nicolas Seriot on 29/12/13.
 6 | //  Copyright (c) 2013 Nicolas Seriot. All rights reserved.
 7 | //
 8 | 
 9 | #include <stdint.h>
10 | #include <wchar.h>
11 | 
12 | #ifndef unibinary_unibinary_h
13 | #define unibinary_unibinary_h
14 | 
15 | // encode
16 | 
17 | int unibinary_encode(FILE *fd_in, FILE *fd_out, size_t wrap_length);
18 | int unibinary_encode_string(const char* src, wchar_t **dst, size_t wrap_length);
19 | 
20 | // decode
21 | 
22 | int unibinary_decode(FILE *src, FILE *dst);
23 | int unibinary_decode_string(const wchar_t *src, char **dst, long *dst_len);
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/javascript/test/template.css:
--------------------------------------------------------------------------------
 1 | #wru {
 2 |     font-family: sans-serif;
 3 |     font-size: 11pt;
 4 |     border: 1px solid #333;
 5 | }
 6 | #wru div {
 7 |     cursor: default;
 8 |     padding: 0;
 9 |     color: #000;
10 | }
11 | #wru div span,
12 | #wru div strong {
13 |     display: block;
14 |     padding: 4px;
15 |     margin: 0;
16 | }
17 | #wru div ul {
18 |     margin: 0;
19 |     padding-bottom: 4px;
20 | }
21 | #wru div.pass {
22 |     background: #90EE90;
23 | }
24 | #wru div.fail {
25 |     background: #FF6347;
26 | }
27 | #wru div.error {
28 |     background: #000;
29 |     color: #FFF;
30 | }


--------------------------------------------------------------------------------
/javascript/test/test.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |     <head>
 4 |         <title>wru :: UniBinary.js unit tests</title>
 5 |         <link rel="stylesheet" type="text/css" href="template.css" />
 6 |     </head>
 7 |     <body>
 8 |         <div id="wru"></div>
 9 |         <script src="test.js"></script>
10 | 	<script src="../unibinary.js"></script>
11 |     </body>
12 | </html>


--------------------------------------------------------------------------------
/javascript/test/test.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Node, Rhino, JSC, and Browser compatible wru test runner.
  3 |  */
  4 | 
  5 | UNIBINARY_UNIT_TESTING = true;
  6 | 
  7 | var global;
  8 | var Uint8Array;
  9 | var unibinary;
 10 | var wru;
 11 | var console;
 12 | 
 13 | function getGlobal() {
 14 |     return (function () {
 15 |         return this
 16 |     })()
 17 | }
 18 | 
 19 | // node, rhino, or web
 20 | try {
 21 |     // node and phantom js
 22 | 
 23 |     wru = require("./wru.console.js");
 24 |     unibinary = require("../unibinary");
 25 | 
 26 |     go(wru);
 27 | } catch (e) {
 28 |     // rhino & jsc
 29 |     try {
 30 | 
 31 |         global = undefined; // this is a hack to work around a bug in wru & new Rhino versions involving JavaAdapter
 32 | 
 33 |         load("../unibinary.js");
 34 |         load("./wru.console.js");
 35 | 
 36 |         if (typeof Uint8Array == "undefined") {
 37 |             load("typedarray.js");
 38 |             var window = getGlobal();
 39 |             for (var property in exports) {
 40 |                 if (exports.hasOwnProperty(property)) {
 41 |                     try {
 42 |                         window[property] = exports[property]
 43 |                     } catch (e) {
 44 |                         print(e);
 45 |                     }
 46 |                 }
 47 |             }
 48 |         }
 49 | 
 50 |         if (typeof console == "undefined") {
 51 |             console = {
 52 |                 error: function (msg) {
 53 |                     print(msg)
 54 |                 },
 55 |                 log: function (msg) {
 56 |                     print(msg)
 57 |                 }
 58 |             }
 59 |         }
 60 | 
 61 |         go(wru);
 62 |     } catch (e) {
 63 |         // html (assuming test.html is used in same folders structure)
 64 |         (function (xhr) {
 65 |             try {
 66 |                 xhr.open("get", "wru.min.js", true);
 67 |                 xhr.onreadystatechange = function () {
 68 |                     if (xhr.readyState == 4) {
 69 |                         try {
 70 |                             Function(xhr.responseText.replace(/var wru=/, "this.wru=")).call(window);
 71 |                         } catch (e) {
 72 |                             alert(e);
 73 |                         }
 74 |                         go(window.wru);
 75 |                     }
 76 |                 };
 77 |                 xhr.send(null);
 78 |             } catch (e) {
 79 |                 alert(e.message || e);
 80 |             }
 81 |         }(new XMLHttpRequest));
 82 |     }
 83 | }
 84 | 
 85 | 
 86 | function go(wru) {
 87 |     var assert = {
 88 |         equal: function (a, b, m) {
 89 |             wru.assert(a == b);
 90 |         },
 91 |         notEqual: function (a, b, m) {
 92 |             wru.assert(a != b);
 93 |         },
 94 | 
 95 |         ok: function (a, m) {
 96 |             wru.assert(a);
 97 |         }
 98 |     }
 99 | 
100 |     wru.test([
101 |         {
102 |             name: "test_unichr_12_encoding_decoding",
103 |             test: function () {
104 | 
105 |                 var testArray = [0x0, 0x1, 0xAB, 0x123, 0xABC, 0xF, 0xFF, 0xFFF];
106 | 
107 |                 for (var j = 0; j < testArray.length; j++) {
108 |                     i = testArray[j];
109 | 
110 |                     var u = unibinary.unichr_12_from_int(i);
111 |                     assert.notEqual(u, i);
112 | 
113 |                     var i2 = unibinary.int_from_u12b(u);
114 |                     assert.equal(i, i2);
115 |                 }
116 | 
117 |             }},
118 |         {
119 | 
120 |             name: "test_3_to_2_bytes", test: function () {
121 | 
122 |             var ab = unibinary.two_twelve_bits_values_from_three_bytes(0x12, 0x34, 0x56);
123 | 
124 |             assert.equal(ab[0], 0x123, "0x" + ab[0].toString(16));
125 |             assert.equal(ab[1], 0x456, "0x" + ab[1].toString(16));
126 | 
127 |         }},
128 |         {
129 | 
130 |             name: "test_2_to_3_bytes", test: function () {
131 | 
132 |             var abc = unibinary.three_bytes_from_two_twelve_bits_values(0x123, 0x456);
133 | 
134 |             assert.equal(abc[0], 0x12, "0x" + abc[0].toString(16));
135 |             assert.equal(abc[1], 0x34, "0x" + abc[1].toString(16));
136 |             assert.equal(abc[2], 0x56, "0x" + abc[2].toString(16));
137 | 
138 |         }},
139 |         {
140 | 
141 |             name: "test_encode_3_bytes", test: function () {
142 | 
143 |             var bytes = [0xab, 0xcd, 0xef];
144 |             var gen = unibinary.encode(bytes);
145 | 
146 |             assert.ok(typeof gen == "string");
147 |             assert.equal(gen.length, 2);
148 | 
149 |             assert.equal(gen.charCodeAt(0), unibinary.U12b_start + 0xABC);
150 |             assert.equal(gen.charCodeAt(1), unibinary.U12b_start + 0xDEF);
151 | 
152 |         }},
153 |         {
154 | 
155 |             name: "test_encode_bytes", test: function () {
156 |             var bytes = [0xab, 0xcd, 0xef, 0xff];
157 | 
158 |             var gen = unibinary.encode(bytes);
159 | 
160 |             assert.ok(typeof gen == "string");
161 |             assert.equal(gen.length, 3);
162 | 
163 |             assert.equal(gen.charCodeAt(0), unibinary.U12b_start + 0xABC);
164 |             assert.equal(gen.charCodeAt(1), unibinary.U12b_start + 0xDEF);
165 |             assert.equal(gen.charCodeAt(2), unibinary.U8_start + 0xFF);
166 | 
167 |         }},
168 |         {
169 |             name: "test_decode_unichars", test: function () {
170 | 
171 |             var u1 = String.fromCharCode(unibinary.U12b_start + 0xABC);
172 |             var u2 = String.fromCharCode(unibinary.U12b_start + 0xDEF);
173 | 
174 |             var s = u1 + u2;
175 | 
176 |             var gen = unibinary.decode(s);
177 | 
178 |             assert.equal(gen.length, 3);
179 | 
180 |             assert.equal(gen[0], 0xAB);
181 |             assert.equal(gen[1], 0xCD);
182 |             assert.equal(gen[2], 0xEF);
183 | 
184 |         }},
185 |         {
186 | 
187 |             name: "test_is_in_U8b", test: function () {
188 |             assert.ok(!unibinary.is_in_U8b(String.fromCharCode(0x03FF)));
189 |             assert.ok(unibinary.is_in_U8b(String.fromCharCode(0x400)));
190 |             assert.ok(unibinary.is_in_U8b(String.fromCharCode(0x4FF)));
191 |             assert.ok(!unibinary.is_in_U8b(String.fromCharCode(0x500)));
192 |         }},
193 |         {
194 | 
195 |             name: "test_unichr_12a_from_two_ascii", test: function () {
196 |             var u = unibinary.unichr_12a_from_two_ascii('Z'.charCodeAt(0), 'E'.charCodeAt(0));
197 |             assert.equal(u, String.fromCharCode(0x9485));
198 | 
199 |             var u = unibinary.unichr_12a_from_two_ascii('z'.charCodeAt(0), ','.charCodeAt(0));
200 |             assert.equal(u, String.fromCharCode(0x8CAC));
201 | 
202 |         }},
203 |         {
204 | 
205 |             name: "test_ascii_characters_encoding", test: function () {
206 |             var s = "abc";
207 | 
208 |             var gen = unibinary.encodeString(s);
209 | 
210 |             assert.ok(typeof gen == "string");
211 |             assert.equal(gen.length, 2);
212 | 
213 |             assert.equal(gen.charCodeAt(0), 0x9662);
214 |             assert.equal(gen.charCodeAt(1), 0x0463);
215 | 
216 |         }},
217 |         {
218 | 
219 |             name: "test_ascii_characters_encoding_2", test: function () {
220 |             var s = "ZE";
221 | 
222 |             var gen = unibinary.encodeString(s);
223 | 
224 |             assert.ok(typeof gen == "string");
225 |             assert.equal(gen.length, 1);
226 | 
227 |             assert.equal(gen[0], unibinary.unichr_12a_from_two_ascii('Z'.charCodeAt(0), 'E'.charCodeAt(0)));
228 | 
229 |         }},
230 |         {
231 | 
232 |             name: "test_two_unichr_to_repeat_byte_ntimes_aaa", test: function () {
233 | 
234 |             var gen = unibinary.two_unichr_to_repeat_byte_ntimes('a'.charCodeAt(0), 10);
235 |             assert.ok(typeof gen == "string");
236 |             assert.equal(gen.length, 2);
237 | 
238 |             assert.equal(gen.charCodeAt(0), 0x0461);
239 |             assert.equal(gen.charCodeAt(1), 0x4E0A);
240 | 
241 |         }},
242 |         {
243 | 
244 |             name: "test_two_unichr_to_repeat_byte_ntimes_xxx", test: function () {
245 | 
246 |             var gen = unibinary.two_unichr_to_repeat_byte_ntimes('x'.charCodeAt(0), 3);
247 | 
248 |             assert.ok(typeof gen == "string");
249 |             assert.equal(gen.length, 2);
250 | 
251 |             assert.equal(gen.charCodeAt(0), 0x0478);
252 |             assert.equal(gen.charCodeAt(1), 0x4E03);
253 | 
254 |         }},
255 |         {
256 | 
257 |             name: "test_repeat", test: function () {
258 | 
259 |             var s = "xxx";
260 | 
261 |             var gen = unibinary.encodeString(s);
262 | 
263 |             assert.ok(typeof gen == "string");
264 |             assert.equal(gen.length, 2);
265 | 
266 |             assert.equal(gen.charCodeAt(0), 0x0478);
267 |             assert.equal(gen.charCodeAt(1), 0x4E03);
268 | 
269 |         }},
270 |         {
271 | 
272 |             name: "test_ascii_characters_decoding", test: function () {
273 |             var s = String.fromCharCode(0x9662) + String.fromCharCode(0x0463);
274 | 
275 |             var s2 = unibinary.decode(s);
276 | 
277 |             assert.equal(s2[0], 'a'.charCodeAt(0));
278 |             assert.equal(s2[1], 'b'.charCodeAt(0));
279 |             assert.equal(s2[2], 'c'.charCodeAt(0));
280 | 
281 | 
282 |         }},
283 |         {
284 | 
285 |             name: "test_ascii_characters_decoding_2", test: function () {
286 |             var s = String.fromCharCode(0x9485);
287 | 
288 |             var s2 = unibinary.decode(s);
289 | 
290 |             assert.equal(s2[0], 'Z'.charCodeAt(0));
291 |             assert.equal(s2[1], 'E'.charCodeAt(0));
292 | 
293 | 
294 |         }},
295 |         {
296 | 
297 |             name: "test_five_bytes_encoding", test: function () {
298 |             var bytes = [0xab, 0xcd, 0xef, 0xab, 0xcd];
299 | 
300 |             var gen = unibinary.encode(bytes);
301 | 
302 |             assert.equal(gen.length, 4);
303 | 
304 |             assert.equal(gen.charCodeAt(0), unibinary.U12b_start + 0xABC);
305 |             assert.equal(gen.charCodeAt(1), unibinary.U12b_start + 0xDEF);
306 |             assert.equal(gen.charCodeAt(2), unibinary.U8_start + 0xAB);
307 |             assert.equal(gen.charCodeAt(3), unibinary.U8_start + 0xCD);
308 | 
309 |         }},
310 |         {
311 | 
312 |             name: "test_ascii_and_bytes_encoding", test: function () {
313 |             var bytes = [0xab, 0xcd, 0xef];
314 |             bytes = bytes.concat([0x61, 0x62, 0x63, 0x64, 0x65]); //abcde
315 | 
316 |             var gen = unibinary.encode(bytes);
317 | 
318 |             assert.equal(gen.length, 5);
319 | 
320 |             assert.equal(gen.charCodeAt(0), unibinary.U12b_start + 0xABC);
321 |             assert.equal(gen.charCodeAt(1), unibinary.U12b_start + 0xDEF);
322 |             assert.equal(gen[2], unibinary.unichr_12a_from_two_ascii('a'.charCodeAt(0), 'b'.charCodeAt(0)));
323 |             assert.equal(gen[3], unibinary.unichr_12a_from_two_ascii('c'.charCodeAt(0), 'd'.charCodeAt(0)));
324 |             assert.equal(gen[4], unibinary.unichr_08_from_int('e'.charCodeAt(0)));
325 | 
326 | 
327 |         }},
328 |         {
329 | 
330 |             name: "test_ascii_and_bytes_decoding", test: function () {
331 |             var s = String.fromCharCode(unibinary.U12b_start + 0xABC);
332 |             s += String.fromCharCode(unibinary.U12b_start + 0xDEF);
333 |             s += unibinary.unichr_12a_from_two_ascii('a'.charCodeAt(0), 'b'.charCodeAt(0));
334 |             s += unibinary.unichr_12a_from_two_ascii('c'.charCodeAt(0), 'd'.charCodeAt(0));
335 |             s += unibinary.unichr_08_from_int('e'.charCodeAt(0));
336 | 
337 |             var gen = unibinary.decode(s);
338 | 
339 |             assert.equal(gen.length, 8);
340 | 
341 |             assert.equal(gen[0], 0xAB);
342 |             assert.equal(gen[1], 0xCD);
343 |             assert.equal(gen[2], 0xEF);
344 |             assert.equal(gen[3], 0x61);
345 |             assert.equal(gen[4], 0x62);
346 |             assert.equal(gen[5], 0x63);
347 |             assert.equal(gen[6], 0x64);
348 |             assert.equal(gen[7], 0x65);
349 | 
350 | 
351 |         }},
352 |         {
353 | 
354 |             name: "test_repeats", test: function () {
355 |             var l = [1, 1, 1, 2, 1];
356 | 
357 |             var n = unibinary.number_of_left_instances_from_index(l, 0);
358 | 
359 |             assert.equal(n, 3);
360 | 
361 |         }},
362 |         {
363 | 
364 |             name: "test_empty_string", test: function () {
365 |             var bytes = "";
366 | 
367 |             var gen = unibinary.encodeString(bytes);
368 | 
369 |             assert.equal(gen, "");
370 | 
371 |         }},
372 |         {
373 | 
374 |             name: "test_one_char", test: function () {
375 |             var bytes = "a";
376 | 
377 |             var gen = unibinary.encodeString(bytes);
378 | 
379 |             assert.ok(typeof gen == "string");
380 |             assert.equal(gen.length, 1);
381 | 
382 | 
383 |             assert.equal(gen.charCodeAt(0), 0x0461);
384 | 
385 |         }},
386 |         {
387 | 
388 |             name: "test_repeats_2", test: function () {
389 | 
390 |             var bytes = [0xAB, 0xCD, 0xEF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00];
391 | 
392 |             var gen = unibinary.encode(bytes);
393 | 
394 |             assert.equal(gen.length, 5);
395 | 
396 |             assert.equal(gen.charCodeAt(0), 0x58BC);
397 |             assert.equal(gen.charCodeAt(1), 0x5BEF);
398 |             assert.equal(gen.charCodeAt(2), 0x04FF);
399 |             assert.equal(gen.charCodeAt(3), 0x4E04);
400 |             assert.equal(gen.charCodeAt(4), 0x0400);
401 | 
402 |         }},
403 |         {
404 | 
405 |             name: "test_encode_macho_header", test: function () {
406 | 
407 |             var bytes = [0xCF, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x01];
408 | 
409 |             var gen = unibinary.encode(bytes);
410 | 
411 |             assert.equal(gen.length, 5);
412 | 
413 |             assert.equal(gen.charCodeAt(0), 0x5AFF);
414 |             assert.equal(gen.charCodeAt(1), 0x58ED);
415 |             assert.equal(gen.charCodeAt(2), 0x5DE0);
416 |             assert.equal(gen.charCodeAt(3), 0x5500);
417 |             assert.equal(gen.charCodeAt(4), 0x5E01);
418 | 
419 |         }},
420 |         {
421 | 
422 |             name: "test_big_repeats_2000_minus_2", test: function () {
423 | 
424 |             var bytes = [];
425 |             for (var i = 0; i < 0x2000 - 2; i++) bytes.push(0xAA);
426 | 
427 |             var gen = unibinary.encode(bytes);
428 | 
429 |             assert.equal(gen.length, 4);
430 | 
431 |             assert.equal(gen.charCodeAt(0), 0x04AA);
432 |             assert.equal(gen.charCodeAt(1), 0x5DFF);
433 |             assert.equal(gen.charCodeAt(2), 0x04AA);
434 |             assert.equal(gen.charCodeAt(3), 0x5DFF);
435 | 
436 |         }},
437 |         {
438 | 
439 |             name: "test_big_repeats_2000", test: function () {
440 | 
441 |             var bytes = [];
442 |             for (var i = 0; i < 0x2000; i++) bytes.push(0xAA);
443 | 
444 |             var gen = unibinary.encode(bytes);
445 |             assert.equal(gen.length, 6);
446 | 
447 |             assert.equal(gen.charCodeAt(0), 0x04AA);
448 |             assert.equal(gen.charCodeAt(1), 0x5DFF);
449 |             assert.equal(gen.charCodeAt(2), 0x04AA);
450 |             assert.equal(gen.charCodeAt(3), 0x5DFF);
451 |             assert.equal(gen.charCodeAt(4), 0x04AA);
452 |             assert.equal(gen.charCodeAt(5), 0x04AA);
453 | 
454 |         }},
455 |         {
456 | 
457 |             name: "test_ascii_text_encoding_decoding", test: function () {
458 | 
459 |             var s = "if I'd listened everything that they said to me, took the time to bleed from all the tiny little arrows shot my way, I wouldn't be here! the ones who don't do anything are always the ones who try to put you down. I'm talking to you: hero time starts right now! time to shine!";
460 | 
461 |             var encodeGen = unibinary.encodeString(s);
462 | 
463 |             var decodeGen = unibinary.decodeString(encodeGen);
464 | 
465 |             assert.equal(s, decodeGen);
466 | 
467 |         }},
468 |         {
469 | 
470 |             name: "test_ascii_text_encoding_decoding_2", test: function () {
471 | 
472 |             var s = "";
473 |             for (var i = 32; i <= 128; i++) s += String.fromCharCode(i);
474 | 
475 |             var encodeGen = unibinary.encodeString(s);
476 | 
477 |             var decodeGen = unibinary.decodeString(encodeGen);
478 | 
479 |             assert.equal(s, decodeGen);
480 | 
481 |         }}
482 |     ]);
483 | }
484 | 


--------------------------------------------------------------------------------
/javascript/test/typedarray.js:
--------------------------------------------------------------------------------
  1 | /* type array polyfill from https://github.com/substack/typedarray */
  2 | 
  3 | var exports = {};
  4 | // Beyond this value, index getters/setters (i.e. array[0], array[1]) are so slow to
  5 | // create, and consume so much memory, that the browser appears frozen.
  6 | var MAX_ARRAY_LENGTH = 1e5;
  7 | 
  8 | // Approximations of internal ECMAScript conversion functions
  9 | var ECMAScript = (function() {
 10 |   // Stash a copy in case other scripts modify these
 11 |   var opts = Object.prototype.toString,
 12 |       ophop = Object.prototype.hasOwnProperty;
 13 | 
 14 |   return {
 15 |     // Class returns internal [[Class]] property, used to avoid cross-frame instanceof issues:
 16 |     Class: function(v) { return opts.call(v).replace(/^\[object *|\]$/g, ''); },
 17 |     HasProperty: function(o, p) { return p in o; },
 18 |     HasOwnProperty: function(o, p) { return ophop.call(o, p); },
 19 |     IsCallable: function(o) { return typeof o === 'function'; },
 20 |     ToInt32: function(v) { return v >> 0; },
 21 |     ToUint32: function(v) { return v >>> 0; }
 22 |   };
 23 | }());
 24 | 
 25 | // Snapshot intrinsics
 26 | var LN2 = Math.LN2,
 27 |     abs = Math.abs,
 28 |     floor = Math.floor,
 29 |     log = Math.log,
 30 |     min = Math.min,
 31 |     pow = Math.pow,
 32 |     round = Math.round;
 33 | 
 34 | // ES5: lock down object properties
 35 | function configureProperties(obj) {
 36 |   if (getOwnPropNames && defineProp) {
 37 |     var props = getOwnPropNames(obj), i;
 38 |     for (i = 0; i < props.length; i += 1) {
 39 |       defineProp(obj, props[i], {
 40 |         value: obj[props[i]],
 41 |         writable: false,
 42 |         enumerable: false,
 43 |         configurable: false
 44 |       });
 45 |     }
 46 |   }
 47 | }
 48 | 
 49 | // emulate ES5 getter/setter API using legacy APIs
 50 | // http://blogs.msdn.com/b/ie/archive/2010/09/07/transitioning-existing-code-to-the-es5-getter-setter-apis.aspx
 51 | // (second clause tests for Object.defineProperty() in IE<9 that only supports extending DOM prototypes, but
 52 | // note that IE<9 does not support __defineGetter__ or __defineSetter__ so it just renders the method harmless)
 53 | var defineProp
 54 | if (Object.defineProperty && (function() {
 55 |       try {
 56 |         Object.defineProperty({}, 'x', {});
 57 |         return true;
 58 |       } catch (e) {
 59 |         return false;
 60 |       }
 61 |     })()) {
 62 |   defineProp = Object.defineProperty;
 63 | } else {
 64 |   defineProp = function(o, p, desc) {
 65 |     if (!o === Object(o)) throw new TypeError("Object.defineProperty called on non-object");
 66 |     if (ECMAScript.HasProperty(desc, 'get') && Object.prototype.__defineGetter__) { Object.prototype.__defineGetter__.call(o, p, desc.get); }
 67 |     if (ECMAScript.HasProperty(desc, 'set') && Object.prototype.__defineSetter__) { Object.prototype.__defineSetter__.call(o, p, desc.set); }
 68 |     if (ECMAScript.HasProperty(desc, 'value')) { o[p] = desc.value; }
 69 |     return o;
 70 |   };
 71 | }
 72 | 
 73 | var getOwnPropNames = Object.getOwnPropertyNames || function (o) {
 74 |   if (o !== Object(o)) throw new TypeError("Object.getOwnPropertyNames called on non-object");
 75 |   var props = [], p;
 76 |   for (p in o) {
 77 |     if (ECMAScript.HasOwnProperty(o, p)) {
 78 |       props.push(p);
 79 |     }
 80 |   }
 81 |   return props;
 82 | };
 83 | 
 84 | // ES5: Make obj[index] an alias for obj._getter(index)/obj._setter(index, value)
 85 | // for index in 0 ... obj.length
 86 | function makeArrayAccessors(obj) {
 87 |   if (!defineProp) { return; }
 88 | 
 89 |   if (obj.length > MAX_ARRAY_LENGTH) throw new RangeError("Array too large for polyfill");
 90 | 
 91 |   function makeArrayAccessor(index) {
 92 |     defineProp(obj, index, {
 93 |       'get': function() { return obj._getter(index); },
 94 |       'set': function(v) { obj._setter(index, v); },
 95 |       enumerable: true,
 96 |       configurable: false
 97 |     });
 98 |   }
 99 | 
100 |   var i;
101 |   for (i = 0; i < obj.length; i += 1) {
102 |     makeArrayAccessor(i);
103 |   }
104 | }
105 | 
106 | // Internal conversion functions:
107 | //    pack<Type>()   - take a number (interpreted as Type), output a byte array
108 | //    unpack<Type>() - take a byte array, output a Type-like number
109 | 
110 | function as_signed(value, bits) { var s = 32 - bits; return (value << s) >> s; }
111 | function as_unsigned(value, bits) { var s = 32 - bits; return (value << s) >>> s; }
112 | 
113 | function packI8(n) { return [n & 0xff]; }
114 | function unpackI8(bytes) { return as_signed(bytes[0], 8); }
115 | 
116 | function packU8(n) { return [n & 0xff]; }
117 | function unpackU8(bytes) { return as_unsigned(bytes[0], 8); }
118 | 
119 | function packU8Clamped(n) { n = round(Number(n)); return [n < 0 ? 0 : n > 0xff ? 0xff : n & 0xff]; }
120 | 
121 | function packI16(n) { return [(n >> 8) & 0xff, n & 0xff]; }
122 | function unpackI16(bytes) { return as_signed(bytes[0] << 8 | bytes[1], 16); }
123 | 
124 | function packU16(n) { return [(n >> 8) & 0xff, n & 0xff]; }
125 | function unpackU16(bytes) { return as_unsigned(bytes[0] << 8 | bytes[1], 16); }
126 | 
127 | function packI32(n) { return [(n >> 24) & 0xff, (n >> 16) & 0xff, (n >> 8) & 0xff, n & 0xff]; }
128 | function unpackI32(bytes) { return as_signed(bytes[0] << 24 | bytes[1] << 16 | bytes[2] << 8 | bytes[3], 32); }
129 | 
130 | function packU32(n) { return [(n >> 24) & 0xff, (n >> 16) & 0xff, (n >> 8) & 0xff, n & 0xff]; }
131 | function unpackU32(bytes) { return as_unsigned(bytes[0] << 24 | bytes[1] << 16 | bytes[2] << 8 | bytes[3], 32); }
132 | 
133 | function packIEEE754(v, ebits, fbits) {
134 | 
135 |   var bias = (1 << (ebits - 1)) - 1,
136 |       s, e, f, ln,
137 |       i, bits, str, bytes;
138 | 
139 |   function roundToEven(n) {
140 |     var w = floor(n), f = n - w;
141 |     if (f < 0.5)
142 |       return w;
143 |     if (f > 0.5)
144 |       return w + 1;
145 |     return w % 2 ? w + 1 : w;
146 |   }
147 | 
148 |   // Compute sign, exponent, fraction
149 |   if (v !== v) {
150 |     // NaN
151 |     // http://dev.w3.org/2006/webapi/WebIDL/#es-type-mapping
152 |     e = (1 << ebits) - 1; f = pow(2, fbits - 1); s = 0;
153 |   } else if (v === Infinity || v === -Infinity) {
154 |     e = (1 << ebits) - 1; f = 0; s = (v < 0) ? 1 : 0;
155 |   } else if (v === 0) {
156 |     e = 0; f = 0; s = (1 / v === -Infinity) ? 1 : 0;
157 |   } else {
158 |     s = v < 0;
159 |     v = abs(v);
160 | 
161 |     if (v >= pow(2, 1 - bias)) {
162 |       e = min(floor(log(v) / LN2), 1023);
163 |       f = roundToEven(v / pow(2, e) * pow(2, fbits));
164 |       if (f / pow(2, fbits) >= 2) {
165 |         e = e + 1;
166 |         f = 1;
167 |       }
168 |       if (e > bias) {
169 |         // Overflow
170 |         e = (1 << ebits) - 1;
171 |         f = 0;
172 |       } else {
173 |         // Normalized
174 |         e = e + bias;
175 |         f = f - pow(2, fbits);
176 |       }
177 |     } else {
178 |       // Denormalized
179 |       e = 0;
180 |       f = roundToEven(v / pow(2, 1 - bias - fbits));
181 |     }
182 |   }
183 | 
184 |   // Pack sign, exponent, fraction
185 |   bits = [];
186 |   for (i = fbits; i; i -= 1) { bits.push(f % 2 ? 1 : 0); f = floor(f / 2); }
187 |   for (i = ebits; i; i -= 1) { bits.push(e % 2 ? 1 : 0); e = floor(e / 2); }
188 |   bits.push(s ? 1 : 0);
189 |   bits.reverse();
190 |   str = bits.join('');
191 | 
192 |   // Bits to bytes
193 |   bytes = [];
194 |   while (str.length) {
195 |     bytes.push(parseInt(str.substring(0, 8), 2));
196 |     str = str.substring(8);
197 |   }
198 |   return bytes;
199 | }
200 | 
201 | function unpackIEEE754(bytes, ebits, fbits) {
202 | 
203 |   // Bytes to bits
204 |   var bits = [], i, j, b, str,
205 |       bias, s, e, f;
206 | 
207 |   for (i = bytes.length; i; i -= 1) {
208 |     b = bytes[i - 1];
209 |     for (j = 8; j; j -= 1) {
210 |       bits.push(b % 2 ? 1 : 0); b = b >> 1;
211 |     }
212 |   }
213 |   bits.reverse();
214 |   str = bits.join('');
215 | 
216 |   // Unpack sign, exponent, fraction
217 |   bias = (1 << (ebits - 1)) - 1;
218 |   s = parseInt(str.substring(0, 1), 2) ? -1 : 1;
219 |   e = parseInt(str.substring(1, 1 + ebits), 2);
220 |   f = parseInt(str.substring(1 + ebits), 2);
221 | 
222 |   // Produce number
223 |   if (e === (1 << ebits) - 1) {
224 |     return f !== 0 ? NaN : s * Infinity;
225 |   } else if (e > 0) {
226 |     // Normalized
227 |     return s * pow(2, e - bias) * (1 + f / pow(2, fbits));
228 |   } else if (f !== 0) {
229 |     // Denormalized
230 |     return s * pow(2, -(bias - 1)) * (f / pow(2, fbits));
231 |   } else {
232 |     return s < 0 ? -0 : 0;
233 |   }
234 | }
235 | 
236 | function unpackF64(b) { return unpackIEEE754(b, 11, 52); }
237 | function packF64(v) { return packIEEE754(v, 11, 52); }
238 | function unpackF32(b) { return unpackIEEE754(b, 8, 23); }
239 | function packF32(v) { return packIEEE754(v, 8, 23); }
240 | 
241 | 
242 | //
243 | // 3 The ArrayBuffer Type
244 | //
245 | 
246 | (function() {
247 | 
248 |   /** @constructor */
249 |   var ArrayBuffer = function ArrayBuffer(length) {
250 |     length = ECMAScript.ToInt32(length);
251 |     if (length < 0) throw new RangeError('ArrayBuffer size is not a small enough positive integer');
252 | 
253 |     this.byteLength = length;
254 |     this._bytes = [];
255 |     this._bytes.length = length;
256 | 
257 |     var i;
258 |     for (i = 0; i < this.byteLength; i += 1) {
259 |       this._bytes[i] = 0;
260 |     }
261 | 
262 |     configureProperties(this);
263 |   };
264 |   
265 |   //
266 |   // 4 The ArrayBufferView Type
267 |   //
268 | 
269 |   // NOTE: this constructor is not exported
270 |   /** @constructor */
271 |   var ArrayBufferView = function ArrayBufferView() {
272 |     //this.buffer = null;
273 |     //this.byteOffset = 0;
274 |     //this.byteLength = 0;
275 |   };
276 | 
277 |   //
278 |   // 5 The Typed Array View Types
279 |   //
280 | 
281 |   function makeConstructor(bytesPerElement, pack, unpack) {
282 |     // Each TypedArray type requires a distinct constructor instance with
283 |     // identical logic, which this produces.
284 | 
285 |     var ctor;
286 |     ctor = function(buffer, byteOffset, length) {
287 |       var array, sequence, i, s;
288 | 
289 |       if (!arguments.length || typeof arguments[0] === 'number') {
290 |         // Constructor(unsigned long length)
291 |         this.length = ECMAScript.ToInt32(arguments[0]);
292 |         if (length < 0) throw new RangeError('ArrayBufferView size is not a small enough positive integer');
293 | 
294 |         this.byteLength = this.length * this.BYTES_PER_ELEMENT;
295 |         this.buffer = new ArrayBuffer(this.byteLength);
296 |         this.byteOffset = 0;
297 |       } else if (typeof arguments[0] === 'object' && arguments[0].constructor === ctor) {
298 |         // Constructor(TypedArray array)
299 |         array = arguments[0];
300 | 
301 |         this.length = array.length;
302 |         this.byteLength = this.length * this.BYTES_PER_ELEMENT;
303 |         this.buffer = new ArrayBuffer(this.byteLength);
304 |         this.byteOffset = 0;
305 | 
306 |         for (i = 0; i < this.length; i += 1) {
307 |           this._setter(i, array._getter(i));
308 |         }
309 |       } else if (typeof arguments[0] === 'object' &&
310 |                  !(arguments[0] instanceof ArrayBuffer || ECMAScript.Class(arguments[0]) === 'ArrayBuffer')) {
311 |         // Constructor(sequence<type> array)
312 |         sequence = arguments[0];
313 | 
314 |         this.length = ECMAScript.ToUint32(sequence.length);
315 |         this.byteLength = this.length * this.BYTES_PER_ELEMENT;
316 |         this.buffer = new ArrayBuffer(this.byteLength);
317 |         this.byteOffset = 0;
318 | 
319 |         for (i = 0; i < this.length; i += 1) {
320 |           s = sequence[i];
321 |           this._setter(i, Number(s));
322 |         }
323 |       } else if (typeof arguments[0] === 'object' &&
324 |                  (arguments[0] instanceof ArrayBuffer || ECMAScript.Class(arguments[0]) === 'ArrayBuffer')) {
325 |         // Constructor(ArrayBuffer buffer,
326 |         //             optional unsigned long byteOffset, optional unsigned long length)
327 |         this.buffer = buffer;
328 | 
329 |         this.byteOffset = ECMAScript.ToUint32(byteOffset);
330 |         if (this.byteOffset > this.buffer.byteLength) {
331 |           throw new RangeError("byteOffset out of range");
332 |         }
333 | 
334 |         if (this.byteOffset % this.BYTES_PER_ELEMENT) {
335 |           // The given byteOffset must be a multiple of the element
336 |           // size of the specific type, otherwise an exception is raised.
337 |           throw new RangeError("ArrayBuffer length minus the byteOffset is not a multiple of the element size.");
338 |         }
339 | 
340 |         if (arguments.length < 3) {
341 |           this.byteLength = this.buffer.byteLength - this.byteOffset;
342 | 
343 |           if (this.byteLength % this.BYTES_PER_ELEMENT) {
344 |             throw new RangeError("length of buffer minus byteOffset not a multiple of the element size");
345 |           }
346 |           this.length = this.byteLength / this.BYTES_PER_ELEMENT;
347 |         } else {
348 |           this.length = ECMAScript.ToUint32(length);
349 |           this.byteLength = this.length * this.BYTES_PER_ELEMENT;
350 |         }
351 | 
352 |         if ((this.byteOffset + this.byteLength) > this.buffer.byteLength) {
353 |           throw new RangeError("byteOffset and length reference an area beyond the end of the buffer");
354 |         }
355 |       } else {
356 |         throw new TypeError("Unexpected argument type(s)");
357 |       }
358 | 
359 |       this.constructor = ctor;
360 | 
361 |       configureProperties(this);
362 |       makeArrayAccessors(this);
363 |     };
364 | 
365 |     ctor.prototype = new ArrayBufferView();
366 |     ctor.prototype.BYTES_PER_ELEMENT = bytesPerElement;
367 |     ctor.prototype._pack = pack;
368 |     ctor.prototype._unpack = unpack;
369 |     ctor.BYTES_PER_ELEMENT = bytesPerElement;
370 | 
371 |     // getter type (unsigned long index);
372 |     ctor.prototype._getter = function(index) {
373 |       if (arguments.length < 1) throw new SyntaxError("Not enough arguments");
374 | 
375 |       index = ECMAScript.ToUint32(index);
376 |       if (index >= this.length) {
377 |         return undefined;
378 |       }
379 | 
380 |       var bytes = [], i, o;
381 |       for (i = 0, o = this.byteOffset + index * this.BYTES_PER_ELEMENT;
382 |            i < this.BYTES_PER_ELEMENT;
383 |            i += 1, o += 1) {
384 |         bytes.push(this.buffer._bytes[o]);
385 |       }
386 |       return this._unpack(bytes);
387 |     };
388 | 
389 |     // NONSTANDARD: convenience alias for getter: type get(unsigned long index);
390 |     ctor.prototype.get = ctor.prototype._getter;
391 | 
392 |     // setter void (unsigned long index, type value);
393 |     ctor.prototype._setter = function(index, value) {
394 |       if (arguments.length < 2) throw new SyntaxError("Not enough arguments");
395 | 
396 |       index = ECMAScript.ToUint32(index);
397 |       if (index >= this.length) {
398 |         return undefined;
399 |       }
400 | 
401 |       var bytes = this._pack(value), i, o;
402 |       for (i = 0, o = this.byteOffset + index * this.BYTES_PER_ELEMENT;
403 |            i < this.BYTES_PER_ELEMENT;
404 |            i += 1, o += 1) {
405 |         this.buffer._bytes[o] = bytes[i];
406 |       }
407 |     };
408 | 
409 |     // void set(TypedArray array, optional unsigned long offset);
410 |     // void set(sequence<type> array, optional unsigned long offset);
411 |     ctor.prototype.set = function(index, value) {
412 |       if (arguments.length < 1) throw new SyntaxError("Not enough arguments");
413 |       var array, sequence, offset, len,
414 |           i, s, d,
415 |           byteOffset, byteLength, tmp;
416 | 
417 |       if (typeof arguments[0] === 'object' && arguments[0].constructor === this.constructor) {
418 |         // void set(TypedArray array, optional unsigned long offset);
419 |         array = arguments[0];
420 |         offset = ECMAScript.ToUint32(arguments[1]);
421 | 
422 |         if (offset + array.length > this.length) {
423 |           throw new RangeError("Offset plus length of array is out of range");
424 |         }
425 | 
426 |         byteOffset = this.byteOffset + offset * this.BYTES_PER_ELEMENT;
427 |         byteLength = array.length * this.BYTES_PER_ELEMENT;
428 | 
429 |         if (array.buffer === this.buffer) {
430 |           tmp = [];
431 |           for (i = 0, s = array.byteOffset; i < byteLength; i += 1, s += 1) {
432 |             tmp[i] = array.buffer._bytes[s];
433 |           }
434 |           for (i = 0, d = byteOffset; i < byteLength; i += 1, d += 1) {
435 |             this.buffer._bytes[d] = tmp[i];
436 |           }
437 |         } else {
438 |           for (i = 0, s = array.byteOffset, d = byteOffset;
439 |                i < byteLength; i += 1, s += 1, d += 1) {
440 |             this.buffer._bytes[d] = array.buffer._bytes[s];
441 |           }
442 |         }
443 |       } else if (typeof arguments[0] === 'object' && typeof arguments[0].length !== 'undefined') {
444 |         // void set(sequence<type> array, optional unsigned long offset);
445 |         sequence = arguments[0];
446 |         len = ECMAScript.ToUint32(sequence.length);
447 |         offset = ECMAScript.ToUint32(arguments[1]);
448 | 
449 |         if (offset + len > this.length) {
450 |           throw new RangeError("Offset plus length of array is out of range");
451 |         }
452 | 
453 |         for (i = 0; i < len; i += 1) {
454 |           s = sequence[i];
455 |           this._setter(offset + i, Number(s));
456 |         }
457 |       } else {
458 |         throw new TypeError("Unexpected argument type(s)");
459 |       }
460 |     };
461 | 
462 |     // TypedArray subarray(long begin, optional long end);
463 |     ctor.prototype.subarray = function(start, end) {
464 |       function clamp(v, min, max) { return v < min ? min : v > max ? max : v; }
465 | 
466 |       start = ECMAScript.ToInt32(start);
467 |       end = ECMAScript.ToInt32(end);
468 | 
469 |       if (arguments.length < 1) { start = 0; }
470 |       if (arguments.length < 2) { end = this.length; }
471 | 
472 |       if (start < 0) { start = this.length + start; }
473 |       if (end < 0) { end = this.length + end; }
474 | 
475 |       start = clamp(start, 0, this.length);
476 |       end = clamp(end, 0, this.length);
477 | 
478 |       var len = end - start;
479 |       if (len < 0) {
480 |         len = 0;
481 |       }
482 | 
483 |       return new this.constructor(
484 |         this.buffer, this.byteOffset + start * this.BYTES_PER_ELEMENT, len);
485 |     };
486 | 
487 |     return ctor;
488 |   }
489 | 
490 |   var Int8Array = makeConstructor(1, packI8, unpackI8);
491 |   var Uint8Array = makeConstructor(1, packU8, unpackU8);
492 |   var Uint8ClampedArray = makeConstructor(1, packU8Clamped, unpackU8);
493 |   var Int16Array = makeConstructor(2, packI16, unpackI16);
494 |   var Uint16Array = makeConstructor(2, packU16, unpackU16);
495 |   var Int32Array = makeConstructor(4, packI32, unpackI32);
496 |   var Uint32Array = makeConstructor(4, packU32, unpackU32);
497 |   var Float32Array = makeConstructor(4, packF32, unpackF32);
498 |   var Float64Array = makeConstructor(8, packF64, unpackF64);
499 | 
500 |   exports.Int8Array = exports.Int8Array || Int8Array;
501 |   exports.Uint8Array = exports.Uint8Array || Uint8Array;
502 |   exports.Uint8ClampedArray = exports.Uint8ClampedArray || Uint8ClampedArray;
503 |   exports.Int16Array = exports.Int16Array || Int16Array;
504 |   exports.Uint16Array = exports.Uint16Array || Uint16Array;
505 |   exports.Int32Array = exports.Int32Array || Int32Array;
506 |   exports.Uint32Array = exports.Uint32Array || Uint32Array;
507 |   exports.Float32Array = exports.Float32Array || Float32Array;
508 |   exports.Float64Array = exports.Float64Array || Float64Array;
509 | }());
510 | 
511 | //
512 | // 6 The DataView View Type
513 | //
514 | 
515 | (function() {
516 |   function r(array, index) {
517 |     return ECMAScript.IsCallable(array.get) ? array.get(index) : array[index];
518 |   }
519 | 
520 |   var IS_BIG_ENDIAN = (function() {
521 |     var u16array = new(exports.Uint16Array)([0x1234]),
522 |         u8array = new(exports.Uint8Array)(u16array.buffer);
523 |     return r(u8array, 0) === 0x12;
524 |   }());
525 | 
526 |   // Constructor(ArrayBuffer buffer,
527 |   //             optional unsigned long byteOffset,
528 |   //             optional unsigned long byteLength)
529 |   /** @constructor */
530 |   var DataView = function DataView(buffer, byteOffset, byteLength) {
531 |     if (arguments.length === 0) {
532 |       buffer = new exports.ArrayBuffer(0);
533 |     } else if (!(buffer instanceof exports.ArrayBuffer || ECMAScript.Class(buffer) === 'ArrayBuffer')) {
534 |       throw new TypeError("TypeError");
535 |     }
536 | 
537 |     this.buffer = buffer || new exports.ArrayBuffer(0);
538 | 
539 |     this.byteOffset = ECMAScript.ToUint32(byteOffset);
540 |     if (this.byteOffset > this.buffer.byteLength) {
541 |       throw new RangeError("byteOffset out of range");
542 |     }
543 | 
544 |     if (arguments.length < 3) {
545 |       this.byteLength = this.buffer.byteLength - this.byteOffset;
546 |     } else {
547 |       this.byteLength = ECMAScript.ToUint32(byteLength);
548 |     }
549 | 
550 |     if ((this.byteOffset + this.byteLength) > this.buffer.byteLength) {
551 |       throw new RangeError("byteOffset and length reference an area beyond the end of the buffer");
552 |     }
553 | 
554 |     configureProperties(this);
555 |   };
556 | 
557 |   function makeGetter(arrayType) {
558 |     return function(byteOffset, littleEndian) {
559 | 
560 |       byteOffset = ECMAScript.ToUint32(byteOffset);
561 | 
562 |       if (byteOffset + arrayType.BYTES_PER_ELEMENT > this.byteLength) {
563 |         throw new RangeError("Array index out of range");
564 |       }
565 |       byteOffset += this.byteOffset;
566 | 
567 |       var uint8Array = new exports.Uint8Array(this.buffer, byteOffset, arrayType.BYTES_PER_ELEMENT),
568 |           bytes = [], i;
569 |       for (i = 0; i < arrayType.BYTES_PER_ELEMENT; i += 1) {
570 |         bytes.push(r(uint8Array, i));
571 |       }
572 | 
573 |       if (Boolean(littleEndian) === Boolean(IS_BIG_ENDIAN)) {
574 |         bytes.reverse();
575 |       }
576 | 
577 |       return r(new arrayType(new exports.Uint8Array(bytes).buffer), 0);
578 |     };
579 |   }
580 | 
581 |   DataView.prototype.getUint8 = makeGetter(exports.Uint8Array);
582 |   DataView.prototype.getInt8 = makeGetter(exports.Int8Array);
583 |   DataView.prototype.getUint16 = makeGetter(exports.Uint16Array);
584 |   DataView.prototype.getInt16 = makeGetter(exports.Int16Array);
585 |   DataView.prototype.getUint32 = makeGetter(exports.Uint32Array);
586 |   DataView.prototype.getInt32 = makeGetter(exports.Int32Array);
587 |   DataView.prototype.getFloat32 = makeGetter(exports.Float32Array);
588 |   DataView.prototype.getFloat64 = makeGetter(exports.Float64Array);
589 | 
590 |   function makeSetter(arrayType) {
591 |     return function(byteOffset, value, littleEndian) {
592 | 
593 |       byteOffset = ECMAScript.ToUint32(byteOffset);
594 |       if (byteOffset + arrayType.BYTES_PER_ELEMENT > this.byteLength) {
595 |         throw new RangeError("Array index out of range");
596 |       }
597 | 
598 |       // Get bytes
599 |       var typeArray = new arrayType([value]),
600 |           byteArray = new exports.Uint8Array(typeArray.buffer),
601 |           bytes = [], i, byteView;
602 | 
603 |       for (i = 0; i < arrayType.BYTES_PER_ELEMENT; i += 1) {
604 |         bytes.push(r(byteArray, i));
605 |       }
606 | 
607 |       // Flip if necessary
608 |       if (Boolean(littleEndian) === Boolean(IS_BIG_ENDIAN)) {
609 |         bytes.reverse();
610 |       }
611 | 
612 |       // Write them
613 |       byteView = new exports.Uint8Array(this.buffer, byteOffset, arrayType.BYTES_PER_ELEMENT);
614 |       byteView.set(bytes);
615 |     };
616 |   }
617 | 
618 |   DataView.prototype.setUint8 = makeSetter(exports.Uint8Array);
619 |   DataView.prototype.setInt8 = makeSetter(exports.Int8Array);
620 |   DataView.prototype.setUint16 = makeSetter(exports.Uint16Array);
621 |   DataView.prototype.setInt16 = makeSetter(exports.Int16Array);
622 |   DataView.prototype.setUint32 = makeSetter(exports.Uint32Array);
623 |   DataView.prototype.setInt32 = makeSetter(exports.Int32Array);
624 |   DataView.prototype.setFloat32 = makeSetter(exports.Float32Array);
625 |   DataView.prototype.setFloat64 = makeSetter(exports.Float64Array);
626 | 
627 |   exports.DataView = exports.DataView || DataView;
628 | 
629 | }());
630 | 


--------------------------------------------------------------------------------
/javascript/test/wru.console.js:
--------------------------------------------------------------------------------
1 | /*!
2 | (C) Andrea Giammarchi, @WebReflection - Mit Style License
3 | */
4 | if(typeof global!="undefined"){var setTimeout=global.setTimeout,setInterval=global.setInterval,clearInterval=global.clearInterval,clearTimeout=global.clearTimeout;setTimeout||(function(h,c,g,a){setInterval=global.setInterval=function b(j,i){return e(j,i,g.call(arguments,2),1)};setTimeout=global.setTimeout=function d(j,i){return e(j,i,g.call(arguments,2))};clearInterval=global.clearInterval=clearTimeout=global.clearTimeout=function f(i){c[i].cancel();h.purge();delete c[i]};function e(l,k,j,i){var m=++a;c[m]=new JavaAdapter(java.util.TimerTask,{run:function(){l.apply(null,j)}});i?h.schedule(c[m],k,k):h.schedule(c[m],k);return m}})(new java.util.Timer(),{},[].slice,0)}else{!function(c,b,a,e){function d(f,g){var h=new Date;while(new Date-h<g){}f.apply(null,e.call(arguments,2))}e=a.slice;c.setTimeout=c.setInterval=d;c.clearInterval=c.clearTimeout=function(){}}(this,0,[])}var wru=function(U){function h(){w=F.call(j);if(w){if(typeof w=="function"){w={name:w[O]||"anonymous",test:w}}l(Z);l((ad(w,O)&&w[O])||(ad(w,e)&&w[e])||L);a=[];q=[];P=[];X={};b("setup");P[ae]||b("test");I||n()}else{p()}}function l(ah,ag){ah=ah+(ag?"":"\n");try{process.stdout.write(ah)}catch(af){try{require("util").print(ah)}catch(af){try{require("sys").print(ah)}catch(af){try{java.lang.System.out.print(ah)}catch(af){try{console.log(ah)}catch(af){print(ah)}}}}}}function p(){var ah=0,ag;l(g);l(Z);switch(true){case !!aa:ah++;ag="error";l(N+"   "+aa+" Errors");break;case !!z:ah++;ag="fail";l(J+g+z+" Failures");break;default:ag="pass";l(y+"      "+o+" Passes")}V.status=ag;l(Z);l(g);V.after();try{process.exit(ah)}catch(af){quit()}}function c(af){for(var ag=0,ah=af[ae];ag<ah;l("    "+(++ag)+". "+af[ag-1])){}}function n(){f();o+=a[ae];z+=q[ae];aa+=P[ae];if(P[ae]){S=N;c(P)}else{if(q[ae]){S=J;c(q)}else{S=y}}l(S+" passes: "+a[ae]+", fails: "+q[ae]+", errors: "+P[ae]);H=0;S=g;h()}function b(af){if(ad(w,af)){try{w[af](X)}catch(ag){W.call(P,g+ag)}}}function ad(ag,af){return m.call(ag,af)}function s(){return B()<0.5?-1:1}function f(){if(M){C(M);M=0}b("teardown")}var V={timeout:u,assert:function Q(ag,af){if(arguments[ae]==1){af=ag;ag=L}v=D;W.call(af?a:q,S+ag);return af},async:function R(ah,ak,ai,aj){var af=ai||V.timeout||(V.timeout=u);aj=++I;if(typeof ah=="function"){af=ak||V.timeout;ak=ah;ah="asynchronous test #"+aj}ai=T(function(){aj=0;W.call(q,ah);--I||(M=T(n,0))},G(af)||V.timeout);return function ag(){if(!aj){return}v=ab;S=ah+": ";try{ak.apply(this,arguments)}catch(al){v=D;W.call(P,S+al)}S=g;if(v){C(ai);--I||(M=T(n,0))}}},test:function k(af,ag){V.after=ag||function(){};j=E.apply(j,[af]);V.random&&ac.call(j,s);I||h()}},D=true,ab=!D,u=100,g=" ",L="unknown",ae="length",O="name",e="description",A="<li>",d="</li>",i="\\|/-",m=V.hasOwnProperty,S=g,Y=S.charAt,t=S.slice,j=[],E=j.concat,r=j.join,W=j.push,F=j.shift,ac=j.sort,I=0,H=0,o=0,z=0,aa=0,M=0,N="\x1B[1;31mERROR\x1B[0m",J="\x1B[0;31mFAILURE\x1B[0m",y="\x1B[0;32mOK\x1B[0m",Z="------------------------------",x,G,B,T,C,w,K,a,q,P,X,v;V.log=function(ah,ag){try{if(ag){throw new Error}console.log(ah)}catch(af){l(ah,0)}};if(typeof __dirname!="undefined"){U.wru=V;U.assert=V.assert;U.async=V.async;U.test=V.test;U.log=V.log;U.random=false;Object.defineProperty(U,"status",{get:function(){return V.status}});Object.defineProperty(U,"timeout",{get:function(){return V.timeout},set:function(af){V.timeout=parseInt(af,10)||V.timeout}});U=global}x=U.Math;G=x.abs;B=x.random;T=U.setTimeout;C=U.clearTimeout;U.setInterval(function(){I&&l(g+Y.call(i,H++%4)+"\b\b",true)},u);undefined;u*=u;V.random=ab;return V}(this);


--------------------------------------------------------------------------------
/javascript/test/wru.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 | (C) Andrea Giammarchi, @WebReflection - Mit Style License
3 | */
4 | var wru=function(Y){function j(){A=K.call(m);if(A){if(typeof A=="function"){A={name:A[S]||"anonymous",test:A}}(P=l(l(Z.node,"div"),"span"))[E]=((ag(A,S)&&A[S])||(ag(A,e)&&A[e])||Q)+i+i;a=[];u=[];T=[];ab={};b("setup");T[ah]||b("test");N||r()}else{t()}}function p(aj){try{return O.call(h,aj)}catch(ai){return h.createElement(aj)}}function l(ai,aj){return ai.appendChild(p(aj))}function g(ai){P[E]=x.call(P[E],0,-2)+i+ai}function t(){var ak=Z.node.insertBefore(p("div"),Z.node.firstChild),al,aj,ai;if(ad){ai=aj="error";al="There Are Errors: "+ad}else{if(C){ai=aj="fail";al=C+" Tests Failed"}else{ai=aj="pass";al="Passed "+s+" Tests"}}Z.status=ai;ak[E]="<strong>"+al+"</strong>";ak.className=aj}function G(){var ai=this.lastChild.style;ai.display=ai.display=="none"?"block":"none"}function c(ai){P[E]+="<ul>"+D+v.call(ai,d+D)+d+"</ul>";(P.onclick=G).call(P)}function r(){f();s+=a[ah];C+=u[ah];ad+=T[ah];g("("+v.call([a[ah],M=u[ah],T[ah]],", ")+")");P=P.parentNode;T[ah]?c(T,W="error"):(M?c(u,W="fail"):W="pass");P.className=W;M=0;W=i;j()}function b(ai){if(ag(A,ai)){try{A[ai](ab)}catch(aj){aa.call(T,i+aj)}}}function ag(aj,ai){return q.call(aj,ai)}function w(){return F()<0.5?-1:1}function f(){if(R){H(R);R=0}b("teardown")}var Z={timeout:y,assert:function U(aj,ai){if(arguments[ah]==1){ai=aj;aj=Q}z=I;aa.call(ai?a:u,W+aj);return ai},async:function V(ak,an,al,am){var ai=al||Z.timeout||(Z.timeout=y);am=++N;if(typeof ak=="function"){ai=an||Z.timeout;an=ak;ak="asynchronous test #"+am}al=X(function(){am=0;aa.call(u,ak);--N||(R=X(r,0))},L(ai)||Z.timeout);return function aj(){if(!am){return}z=ae;W=ak+": ";try{an.apply(this,arguments)}catch(ao){z=I;aa.call(T,W+ao)}W=i;if(z){H(al);--N||(R=X(r,0))}}},test:function n(ai,aj){Z.after=aj||function(){};m=J.apply(m,[ai]);Z.random&&af.call(m,w);N||j()}},I=true,ae=!I,y=100,i=" ",Q="unknown",ah="length",S="name",e="description",D="<li>",d="</li>",k="\\|/-",q=Z.hasOwnProperty,W=i,ac=W.charAt,x=W.slice,m=[],J=m.concat,v=m.join,aa=m.push,K=m.shift,af=m.sort,N=0,M=0,s=0,C=0,ad=0,R=0,E="innerHTML",h=Y.document,O=h.createElement,B,L,F,X,H,A,P,a,u,T,ab,z;B=Y.Math;L=B.abs;F=B.random;X=Y.setTimeout;H=Y.clearTimeout;Z.node=(h.getElementById("wru")||h.body||h.documentElement);Y.setInterval(function(){N&&g(ac.call(k,M++%4))},y);undefined;Z.log=function o(aj,ai){ai?alert(aj):(typeof console!="undefined")&&console.log(aj)};y*=y;Z.random=ae;return Z}(this);


--------------------------------------------------------------------------------
/javascript/unibinary.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * UniBinary - Encodes and decodes data into printable UniCode characters.
  3 |  *
  4 |  * Authors:
  5 |  * Nicolas Seriot, 2013-01-17
  6 |  * Toolsley, 2014-12-03 (JavaScript port)
  7 |  *
  8 |  * License: BSD
  9 |  *
 10 |  */
 11 | (function (root, factory) {
 12 |     if (typeof define === 'function' && define.amd) {
 13 |         define([], factory);
 14 |     } else if (typeof exports === 'object') {
 15 |         module.exports = factory();
 16 |     } else {
 17 |         root.unibinary = factory();
 18 |   }
 19 | }(this, function () {
 20 | 
 21 | //encodes ascii characters (7 bits)
 22 |     var U12a_0_0_start = 0x5E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 0,0
 23 |     var U12a_0_1_start = 0x6E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 0,1
 24 |     var U12a_1_0_start = 0x7E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 1,0
 25 |     var U12a_1_1_start = 0x8E00; // CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 1,1
 26 |     var U12a_length = 0x1000;
 27 | 
 28 | //encodes arbitrary bits
 29 |     var U12b_start = 0x4E00; // CJK Unified Ideographs (subset) - encodes 12 bits
 30 |     var U12b_length = 0x1000;
 31 |     var U8_start = 0x0400;   // Cyrillic                        - encodes 8 bits
 32 |     var U8_length = 0x100;
 33 | 
 34 |     var two_unichr_to_repeat_byte_ntimes = function (b, n) {
 35 | 
 36 |         if (n > 0xFFF) throw new Error("ValueError");
 37 | 
 38 |         if (b > 0xFF) throw new Error("ValueError");
 39 | 
 40 |         var uni_b = String.fromCharCode(U8_start + b);
 41 |         var uni_r = String.fromCharCode(U12b_start + n);
 42 | 
 43 |         return uni_b + uni_r;
 44 | 
 45 |     }
 46 | 
 47 |     var unichr_12a_from_two_ascii = function (a1, a2) {
 48 | 
 49 |         var i1 = a1;
 50 |         var i2 = a2;
 51 | 
 52 |         var unicode_start = null;
 53 | 
 54 |         if ((i1 < 64) && (i2 < 64)) {
 55 |             unicode_start = U12a_0_0_start;
 56 |         } else if ((i1 < 64) && (i2 >= 64)) {
 57 |             i2 -= 64;
 58 |             unicode_start = U12a_0_1_start;
 59 |         } else if ((i1 >= 64) && (i2 < 64)) {
 60 |             i1 -= 64;
 61 |             unicode_start = U12a_1_0_start;
 62 |         } else if ((i1 >= 64) && (i2 >= 64)) {
 63 |             i1 -= 64;
 64 |             i2 -= 64;
 65 |             unicode_start = U12a_1_1_start;
 66 |         }
 67 | 
 68 |         return String.fromCharCode(unicode_start + (i1 << 6) + i2)
 69 |     }
 70 | 
 71 |     var unichr_08_from_int = function (i) {
 72 |         if (i > (U8_start + U8_length)) {
 73 |             console.error("-- unichr_08_from_int: 0x" + i.toString(16));
 74 |             throw new Error("ValueError");
 75 |         }
 76 | 
 77 |         return String.fromCharCode(U8_start + i);
 78 |     }
 79 | 
 80 |     var unichr_12_from_int = function (i) {
 81 |         if (i > (U12b_start + U12b_length)) {
 82 |             console.error("-- unichr_12_from_int: 0x" + i.toString(16));
 83 |             throw new Error("ValueError");
 84 |         }
 85 | 
 86 |         return String.fromCharCode(U12b_start + i);
 87 |     }
 88 | 
 89 |     var int_from_u08b = function (u) {
 90 |         i = u.charCodeAt(0);
 91 |         if ((i < U8_start) || (i > (U8_start + U8_length))) {
 92 |             console.error("-- int_from_u8: " + u.toString());
 93 |             throw new Error("ValueError");
 94 |         }
 95 | 
 96 |         return i - U8_start;
 97 |     }
 98 | 
 99 |     var two_bytes_from_u12a = function (u) {
100 |         var i1 = null;
101 |         var i2 = null;
102 |         var unicode_start = null;
103 |         i = u.charCodeAt(0);
104 | 
105 |         for (var j = 0, start; start = [U12a_0_0_start, U12a_0_1_start, U12a_1_0_start, U12a_1_1_start][j]; j++) {
106 |             if ((i >= start) && (i < (start + U12a_length)))
107 |                 unicode_start = start;
108 |         }
109 | 
110 |         if (!unicode_start) {
111 |             console.error("-- two_bytes_from_u12a ord=0x" + u.charCodeAt(0));
112 |             throw new Error("ValueError");
113 |         }
114 | 
115 |         var value = i - unicode_start;
116 |         var b0 = (value & 0xFC0) >> 6;
117 |         var b1 = i & 0x3F;
118 | 
119 |         switch (unicode_start) {
120 |             case U12a_0_1_start:
121 |                 b1 += 64;
122 |                 break;
123 |             case U12a_1_0_start:
124 |                 b0 += 64;
125 |                 break;
126 |             case U12a_1_1_start:
127 |                 b0 += 64;
128 |                 b1 += 64;
129 |         }
130 | 
131 |         return [b0, b1]
132 | 
133 |     }
134 | 
135 |     var int_from_u12b = function (u) {
136 |         var i = u.charCodeAt(0);
137 |         if ((i < U12b_start) || ( i > (U12b_start + U12b_length))) {
138 |             console.error("-- int_from_u12b: " + u);
139 |             throw new Error("ValueError");
140 |         }
141 | 
142 |         return i - U12b_start;
143 |     }
144 | 
145 | 
146 |     var two_twelve_bits_values_from_three_bytes = function (a, b, c) {
147 |         // (0x12, 0x34, 0x56) -> (0x123, 0x456)
148 |         if ((a > 0xFF) || (b > 0xFF) || (c > 0xFF))
149 |             throw new Error("ValueError");
150 | 
151 |         var s1 = (a << 4) + (b >> 4);
152 |         var s2 = ((b & 0xF) << 8) + c;
153 | 
154 |         return [s1 , s2];
155 |     }
156 | 
157 | 
158 |     var three_bytes_from_two_twelve_bits_values = function (i1, i2) {
159 |         // (0x123, 0x456) -> (0x12, 0x34, 0x56)
160 |         if ((i1 > 0xFFF) || (i2 > 0xFFF))
161 |             throw new Error("ValueError");
162 | 
163 |         var b1 = i1 >> 4;
164 |         var b2 = ((i1 & 0xF) << 4) + ((i2 & 0xF00) >> 8);
165 |         var b3 = i2 & 0x0FF;
166 | 
167 |         return [b1, b2, b3];
168 | 
169 |     }
170 | 
171 |     var number_of_left_instances_from_index = function (l, index) {
172 |         var i = index;
173 |         var c = 0;
174 |         var x = l[i];
175 | 
176 |         while (i < l.length) {
177 |             if (l[i] == x) {
178 |                 c += 1;
179 |             } else {
180 |                 break;
181 |             }
182 |             i += 1;
183 |         }
184 |         return c;
185 | 
186 |     }
187 | 
188 |     var three_bytes_from_unichars = function (u1, u2) {
189 |         var i1 = int_from_u12b(u1);
190 |         var i2 = int_from_u12b(u2);
191 |         return three_bytes_from_two_twelve_bits_values(i1, i2);
192 |     }
193 | 
194 |     var repeated_bytes_from_unichars = function (u1, u2) {
195 |         var b = int_from_u08b(u1);
196 |         var n = int_from_u12b(u2);
197 |         var r = [];
198 |         for (var i = 0; i < n; i++) r.push(b);
199 |         return r;
200 |     }
201 | 
202 | 
203 |     var two_bytes_from_unichars = function (u1, u2) {
204 |         var b1 = int_from_u08b(u1);
205 |         var b2 = int_from_u08b(u2);
206 |         return [b1, b2];
207 |     }
208 | 
209 | 
210 |     var is_in_U12a = function (u) {
211 |         var i = u.charCodeAt(0);
212 | 
213 |         for (var j = 0, start; start = [U12a_0_0_start, U12a_0_1_start, U12a_1_0_start, U12a_1_1_start][j]; j++) {
214 |             if ((i >= start) && (i < (start + U12a_length)))
215 |                 return true;
216 |         }
217 |         return false;
218 | 
219 | 
220 |     }
221 | 
222 |     var is_in_U8b = function (u) {
223 |         var i = u.charCodeAt(0);
224 |         return ((i >= U8_start) && (i < (U8_start + U8_length)));
225 |     }
226 | 
227 |     var is_in_U12b = function (u) {
228 |         var i = u.charCodeAt(0);
229 |         return ((i >= U12b_start) && (i < (U12b_start + U12b_length)));
230 |     }
231 | 
232 |     var bytes_from_u1_u2 = function (u1, u2) {
233 |         var u1_in_U12 = is_in_U12b(u1);
234 |         var u2_in_U12 = is_in_U12b(u2);
235 | 
236 |         var u1_in_U8 = is_in_U8b(u1);
237 |         var u2_in_U8 = is_in_U8b(u2);
238 | 
239 |         if (u1_in_U12 && u2_in_U12)
240 |             return three_bytes_from_unichars(u1, u2)
241 |         else if (u1_in_U8 && u2_in_U12)
242 |             return repeated_bytes_from_unichars(u1, u2)
243 |         else if (u1_in_U8 && u2_in_U8)
244 |             return two_bytes_from_unichars(u1, u2)
245 |         else {
246 |             console.error("--" + u1 + " " + u2 + " " + u1.charCodeAt(0).toString(16) + " " + u2.charCodeAt(0).toString(16));
247 |             throw new Error("ValueError");
248 |         }
249 | 
250 |     }
251 | 
252 |     var gen_encode_unichars_from_bytes = function (bytes) {
253 |         var i = 0;
254 | 
255 |         var result = "";
256 | 
257 |         while (i < bytes.length) {
258 |             var r = number_of_left_instances_from_index(bytes, i);
259 | 
260 |             if (r >= 3) {
261 |                 // read N bytes | N >= 3 and N < 0x1000, encode as 2 unichar
262 |                 if (r >= 0x1000) {
263 |                     r = 0xFFF
264 |                 }
265 | 
266 |                 result += two_unichr_to_repeat_byte_ntimes(bytes[i], r);
267 | 
268 |                 i += r;
269 |             } else {
270 |                 var two_ascii_chars_available = bytes.length >= i + 2 && bytes[i] < 128 && bytes[i + 1] < 128;
271 | 
272 |                 if (two_ascii_chars_available) {
273 |                     //read 2 x 7 bits, encode 1 unichar
274 |                     result += unichr_12a_from_two_ascii(bytes[i], bytes[i + 1]);
275 |                     i += 2;
276 |                 } else if (bytes.length >= i + 3) {
277 |                     // read 3 bytes, encode 2 unichars
278 | 
279 |                     var s = two_twelve_bits_values_from_three_bytes(bytes[i], bytes[i + 1], bytes[i + 2]);
280 |                     result += unichr_12_from_int(s[0])+unichr_12_from_int(s[1]);
281 |                     i += 3;
282 |                 } else {
283 |                     // read 1 byte, encode 1 unichar
284 | 
285 |                     result += unichr_08_from_int(bytes[i]);
286 |                     i += 1;
287 |                 }
288 | 
289 | 
290 |             }
291 | 
292 |         }
293 |         return result;
294 | 
295 |     }
296 | 
297 |     var gen_decode_bytes_from_string = function (s) {
298 |         var i = 0;
299 | 
300 | 
301 |         // strip linebreaks
302 |         s = s.replace(/(\r\n|\n|\r)/gm,"");
303 | 
304 |         //first pass determine size
305 | 
306 |         var bufferSize = 0;
307 | 
308 |         while (i < s.length) {
309 |             if (s[i] == '\n') {
310 |                 i += 1;
311 |                 continue;
312 |             }
313 | 
314 |             if (is_in_U12a(s[i])) {
315 |                 // 1 U12a -> read 2 ascii characters
316 |                 //var bytes = two_bytes_from_u12a(s[i])
317 |                 i += 1;
318 |                 bufferSize += 2;
319 |             } else if ((i + 1) < s.length) {
320 |                 // (U12b, U12b) -> read 3 bytes
321 |                 // (U8b, U12b) -> read repetition
322 |                 // (U8b, U8b) -> read 1 byte, 1 byte
323 |                 var u1 = s[i];
324 |                 i += 1;
325 | 
326 |                 while (s[i] == '\n') {
327 |                     i += 1;
328 |                 }
329 | 
330 |                 var u2 = s[i];
331 |                 i += 1;
332 | 
333 |                 bytes = bytes_from_u1_u2(u1, u2)
334 |                 bufferSize += bytes.length;
335 |             } else if (is_in_U8b(s[i])) {
336 |                 // 1 U8b -> read 1 byte
337 |                 //var b = int_from_u08b(s[i]);
338 |                 i += 1;
339 |                 //return [b];
340 |                 bufferSize += 1;
341 |             } else {
342 |                 console.error("cannot decode " + s);
343 |             }
344 | 
345 |         }
346 | 
347 |         var result = new Uint8Array(bufferSize);
348 |         var resultLoc = 0;
349 | 
350 |         i = 0;
351 |         while (i < s.length) {
352 |             if (s[i] == '\n') {
353 |                 i += 1;
354 |                 continue;
355 |             }
356 | 
357 |             if (is_in_U12a(s[i])) {
358 |                 // 1 U12a -> read 2 ascii characters
359 |                 var bytes = two_bytes_from_u12a(s[i])
360 |                 i += 1;
361 |                 result[resultLoc] = bytes[0];
362 |                 result[resultLoc + 1] = bytes[1];
363 |                 resultLoc += 2;
364 | 
365 |             } else if ((i + 1) < s.length) {
366 |                 // (U12b, U12b) -> read 3 bytes
367 |                 // (U8b, U12b) -> read repetition
368 |                 // (U8b, U8b) -> read 1 byte, 1 byte
369 |                 var u1 = s[i];
370 |                 i += 1;
371 | 
372 |                 while (s[i] == '\n') {
373 |                     i += 1;
374 |                 }
375 | 
376 |                 var u2 = s[i];
377 |                 i += 1;
378 | 
379 |                 bytes = bytes_from_u1_u2(u1, u2)
380 |                 for (var j = 0; j < bytes.length; j++) {
381 |                     result[resultLoc + j] = bytes[j];
382 |                 }
383 |                 resultLoc += bytes.length;
384 | 
385 |             } else if (is_in_U8b(s[i])) {
386 |                 // 1 U8b -> read 1 byte
387 |                 var b = int_from_u08b(s[i]);
388 |                 i += 1;
389 | 
390 |                 result[resultLoc] = b;
391 |                 resultLoc++;
392 |             } else {
393 |                 console.error("cannot decode " + s);
394 |             }
395 | 
396 |         }
397 | 
398 |         return result;
399 | 
400 |     }
401 | 
402 |     var decodeString = function (encoded) {
403 | 
404 |         var resultArray = gen_decode_bytes_from_string(encoded);
405 | 
406 |         var encodedString = "";
407 |         
408 | 	for(var i=0;i<resultArray.length;i++) encodedString+=String.fromCharCode(resultArray[i]);
409 | 
410 |         return decodeURIComponent(escape(encodedString));
411 | 
412 |     }
413 | 
414 |     var encodeString = function (str) {
415 | 
416 |         var utf8 = [];
417 |         for (var i = 0; i < str.length; i++) {
418 |             var charcode = str.charCodeAt(i);
419 |             if (charcode < 0x80) utf8.push(charcode);
420 |             else if (charcode < 0x800) {
421 |                 utf8.push(0xc0 | (charcode >> 6),
422 |                         0x80 | (charcode & 0x3f));
423 |             }
424 |             else if (charcode < 0xd800 || charcode >= 0xe000) {
425 |                 utf8.push(0xe0 | (charcode >> 12),
426 |                         0x80 | ((charcode >> 6) & 0x3f),
427 |                         0x80 | (charcode & 0x3f));
428 |             }
429 |             // surrogate pair
430 |             else {
431 |                 i++;
432 |                 // UTF-16 encodes 0x10000-0x10FFFF by
433 |                 // subtracting 0x10000 and splitting the
434 |                 // 20 bits of 0x0-0xFFFFF into two halves
435 |                 charcode = 0x10000 + (((charcode & 0x3ff) << 10)
436 |                     | (str.charCodeAt(i) & 0x3ff))
437 |                 utf8.push(0xf0 | (charcode >> 18),
438 |                         0x80 | ((charcode >> 12) & 0x3f),
439 |                         0x80 | ((charcode >> 6) & 0x3f),
440 |                         0x80 | (charcode & 0x3f));
441 |             }
442 |         }
443 | 
444 |         return gen_encode_unichars_from_bytes(utf8);
445 | 
446 |     }
447 | 
448 |     if (typeof UNIBINARY_UNIT_TESTING == 'undefined') {
449 | 
450 | 	    return {
451 | 	        encode: gen_encode_unichars_from_bytes,
452 | 	        decode: gen_decode_bytes_from_string,
453 | 	        encodeString: encodeString,
454 | 	        decodeString: decodeString
455 | 	    }
456 | 
457 |     } else {
458 | 
459 |         return {
460 |             encode: gen_encode_unichars_from_bytes,
461 |             decode: gen_decode_bytes_from_string,
462 |             encodeString: encodeString,
463 |             decodeString: decodeString,
464 |             two_unichr_to_repeat_byte_ntimes:two_unichr_to_repeat_byte_ntimes,
465 |             unichr_12a_from_two_ascii:unichr_12a_from_two_ascii,
466 |             unichr_08_from_int:unichr_08_from_int,
467 |             unichr_12_from_int:unichr_12_from_int,
468 |             int_from_u08b:int_from_u08b,
469 |             two_bytes_from_u12a:two_bytes_from_u12a,
470 |             int_from_u12b:int_from_u12b,
471 |             two_twelve_bits_values_from_three_bytes:two_twelve_bits_values_from_three_bytes,
472 |             three_bytes_from_two_twelve_bits_values:three_bytes_from_two_twelve_bits_values,
473 |             number_of_left_instances_from_index:number_of_left_instances_from_index,
474 |             three_bytes_from_unichars:three_bytes_from_unichars,
475 |             repeated_bytes_from_unichars:repeated_bytes_from_unichars,
476 |             two_bytes_from_unichars:two_bytes_from_unichars,
477 |             is_in_U12a:is_in_U12a,
478 |             is_in_U8b:is_in_U8b,
479 |             is_in_U12b:is_in_U12b,
480 |             bytes_from_u1_u2:bytes_from_u1_u2,
481 |             U8_start:U8_start,
482 |             U8_length:U8_length,
483 |             U12b_start:U12b_start,
484 |             U12b_length:U12b_length
485 |         }
486 |             
487 |     }
488 | }));
489 | 
490 | 


--------------------------------------------------------------------------------
/javascript/unibinary_tool.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | /**
 4 |  * node.js command line tool for unibinary.js
 5 |  *
 6 |  * Author:
 7 |  * Toolsley, 2014-12-03
 8 |  *
 9 |  * License: BSD
10 |  *
11 |  */
12 | 
13 | var fs = require('fs');
14 | var constants = require('constants');
15 | var unibinary = require('./unibinary');
16 | 
17 | function toArrayBuffer(buffer) {
18 |     var ab = new ArrayBuffer(buffer.length);
19 |     var view = new Uint8Array(ab);
20 |     for (var i = 0; i < buffer.length; ++i) {
21 |         view[i] = buffer[i];
22 |     }
23 |     return ab;
24 | }
25 | 
26 | function toBuffer(view) {
27 |     var buffer = new Buffer(view.length);
28 |     for (var i = 0; i < buffer.length; ++i) {
29 |         buffer[i] = view[i];
30 |     }
31 |     return buffer;
32 | }
33 | 
34 | switch (process.argv[2]) {
35 |     case "-es":
36 |     case "--encode_string":
37 |         console.log(unibinary.encodeString(process.argv[3]));
38 |         break;
39 |     case "-ds":
40 |     case "--decode_string":
41 |         console.log(process.argv[3]);
42 |         console.log(unibinary.decodeString(process.argv[3]));
43 |         break;
44 |     case "-e":
45 |     case "--encode":
46 |         fs.readFile(process.argv[3],function (err, data) {
47 |             if (err) throw err;
48 |             var dataArray = new Uint8Array(toArrayBuffer(data));
49 |             process.stdout.write(unibinary.encode(dataArray));
50 |         });
51 |         break;
52 |     case "-d":
53 |     case "--decode":
54 |         fs.readFile(process.argv[3],"utf-8",function (err, data) {
55 |             if (err) throw err;
56 |             var dataBuf = toBuffer(unibinary.decode(data));
57 |             process.stdout.write(dataBuf);
58 |         });
59 |         break;
60 |     default:
61 | 
62 |         console.log("usage: unibinary_tool.js [-h] [-e ENCODE] [-d DECODE] [-es ENCODE_STRING]\n\
63 |                          [-ds DECODE_STRING]\n\
64 |         \n\
65 |         UniBinary encodes and decodes data into printable Unicode characters.\n\
66 |         \n\
67 |         optional arguments:\n\
68 |         -h, --help            show this help message and exit\n\
69 |         -e ENCODE, --encode ENCODE\n\
70 |             file to encode\n\
71 |         -d DECODE, --decode DECODE\n\
72 |             file to decode\n\
73 |         -es ENCODE_STRING, --encode_string ENCODE_STRING\n\
74 |             utf-8 string to encode\n\
75 |         -ds DECODE_STRING, --decode_string DECODE_STRING\n\
76 |             utf-8 string to decode\n");
77 | }


--------------------------------------------------------------------------------
/python/ub_profile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Nicolas Seriot, 2013-01-17
 3 | 
 4 | """
 5 | UniBinary profiling
 6 | 
 7 | $ python ub_profile.py
 8 | """
 9 | 
10 | from unibinary import *
11 | import hotshot
12 | from hotshot import stats
13 | 
14 | def profile_encode_file():
15 |     #f = open("/usr/bin/true", "rb")
16 |     f = open("/Users/nst/Desktop/sc.png", "rb") # any file ~ 800 KB
17 |     bytes = f.read()
18 |     f.close()
19 |     
20 |     f = codecs.open("/tmp/tmp.txt", 'w', encoding='utf-16')
21 |     for unichars in gen_encode_unichars_from_bytes(bytes):
22 |         for u in unichars:
23 |             f.write(u)
24 |     f.close()
25 | 
26 | def profile_decode_file():
27 |     f = codecs.open("/tmp/tmp.txt", "r", encoding='utf-16')
28 |     s = f.read()
29 |     f.close()
30 |     
31 |     f = open("/tmp/tmp.bin", 'wb')
32 |     for chunk in gen_decode_bytes_from_string(s):
33 |         for b in chunk:
34 |             buf = struct.pack("B", b)
35 |             f.write(buf)
36 |     f.close()
37 | 
38 | if __name__ == '__main__':
39 | 
40 |     for f in [profile_encode_file, profile_decode_file]:
41 |     
42 |         prof = hotshot.Profile("hotshot_stats.prof")
43 |         prof.runcall(f)
44 |         prof.close()
45 |         
46 |         s = stats.load("hotshot_stats.prof")
47 |         s.strip_dirs()
48 |         s.sort_stats('time', 'calls')
49 |         s.print_stats(20)
50 | 


--------------------------------------------------------------------------------
/python/ub_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Nicolas Seriot, 2013-01-17
  3 | 
  4 | """
  5 | UniBinary tests
  6 | 
  7 | $ python ub_tests.py
  8 | """
  9 | 
 10 | from unibinary import *
 11 | import unittest
 12 | 
 13 | def shasum(filename):
 14 |     m = hashlib.sha1()
 15 |     with open(filename,'rb') as f: 
 16 |         for chunk in iter(lambda: f.read(128*m.block_size), b''): 
 17 |              m.update(chunk)
 18 |     return m.hexdigest()
 19 | 
 20 | class TestUnidata(unittest.TestCase):
 21 | 
 22 |     #def setUp(self):
 23 |     #    pass
 24 | 
 25 |     #def tearDown(self):
 26 |     #    pass
 27 |     
 28 |     def test_unichr_12_encoding_decoding(self):
 29 |     
 30 |         for i in [0x0, 0x1, 0xAB, 0x123, 0xABC, 0xF, 0xFF, 0xFFF]:
 31 |     
 32 |             u = unichr_12_from_int(i)
 33 |             self.assertNotEqual(i, u)
 34 |             
 35 |             i2 = int_from_u12b(u)
 36 |             self.assertEqual(i, i2)
 37 |     
 38 |     def test_3_to_2_bytes(self):
 39 | 
 40 |         (a, b) = two_twelve_bits_values_from_three_bytes(0x12, 0x34, 0x56)
 41 |         
 42 |         self.assertEqual(a, 0x123, "0x%x" % a)
 43 |         self.assertEqual(b, 0x456, "0x%x" % b)
 44 |     
 45 |     def test_2_to_3_bytes(self):
 46 |         
 47 |         (a,b,c) = three_bytes_from_two_twelve_bits_values(0x123, 0x456)
 48 | 
 49 |         self.assertEqual(a, 0x12, "0x%x" % a)
 50 |         self.assertEqual(b, 0x34, "0x%x" % b)
 51 |         self.assertEqual(c, 0x56, "0x%x" % c)
 52 | 
 53 |     def test_encode_3_bytes(self):
 54 |         bytes = "\xab\xcd\xef"
 55 |         
 56 |         gen = gen_encode_unichars_from_bytes(bytes)
 57 |         
 58 |         (u1, u2) = gen.next()
 59 |         
 60 |         self.assertFalse(list(gen))        
 61 | 
 62 |         self.assertEqual(u1, unichr(U12b_start + 0xABC))
 63 |         self.assertEqual(u2, unichr(U12b_start + 0xDEF))
 64 |     
 65 |     def test_encode_bytes(self):
 66 |         bytes = "\xab\xcd\xef\xff"
 67 |         
 68 |         gen = gen_encode_unichars_from_bytes(bytes)
 69 |         
 70 |         (u1, u2) = gen.next()
 71 |         u3 = gen.next()
 72 |         
 73 |         self.assertFalse(list(gen))        
 74 |         
 75 |         self.assertEqual(u1, unichr(U12b_start + 0xABC))
 76 |         self.assertEqual(u2, unichr(U12b_start + 0xDEF))
 77 |         self.assertEqual(u3, unichr(U8_start + 0xFF))
 78 |     
 79 |     def test_decode_unichars(self):
 80 |         
 81 |         u1 = unichr(U12b_start + 0xABC)
 82 |         u2 = unichr(U12b_start + 0xDEF)
 83 |         
 84 |         s = u''
 85 |         s += u1
 86 |         s += u2
 87 |         
 88 |         gen = gen_decode_bytes_from_string(s)
 89 |                
 90 |         (a,b,c) = gen.next()
 91 |         
 92 |         self.assertFalse(list(gen))        
 93 | 
 94 |         self.assertEqual(a, 0xAB)
 95 |         self.assertEqual(b, 0xCD)
 96 |         self.assertEqual(c, 0xEF)
 97 |     
 98 |     def test_is_in_U8b(self):
 99 |         self.assertFalse(is_in_U8b(u"\u03FF"))
100 |         
101 |         self.assertTrue(is_in_U8b(u"\u0400"))
102 |         self.assertTrue(is_in_U8b(u"\u04FF"))
103 | 
104 |         self.assertFalse(is_in_U8b(u"\u0500"))
105 |         
106 |     def test_encoding_decoding_utf16_file(self):
107 |         
108 |         src = "/usr/bin/true"
109 |         tmp = "/tmp/true.txt"
110 |         cpy = "/tmp/true"
111 |         
112 |         import os
113 |         if not os.path.exists(src):
114 |             print "-- WARNING: cannot test %s, file does not exist" % src
115 |             return
116 |         
117 |         for e in ['utf-8', 'utf-16']:
118 | 
119 |             f = open(src, "rb")
120 |             bytes = f.read()
121 |             f.close()
122 |         
123 |             f = codecs.open(tmp, 'w', encoding=e)
124 |             
125 |             for unichars in gen_encode_unichars_from_bytes(bytes):
126 |                 for u in unichars:
127 |                     f.write(u)
128 |             
129 |             f.close()
130 |             
131 |             ##
132 |             
133 |             f = codecs.open(tmp, 'r', encoding=e)
134 |             s = f.read()
135 |             f.close()
136 |             
137 |             f = open(cpy, 'wb')
138 |             for bytes in gen_decode_bytes_from_string(s):
139 |                 for b in bytes:
140 |                     buf = struct.pack("B", b)
141 |                     f.write(buf)
142 |             f.close()
143 |             
144 |             shasum_src = shasum(src)
145 |             shasum_cpy = shasum(cpy)
146 |             
147 |             self.assertEqual(shasum_src, shasum_cpy)
148 | 
149 |     def test_unichr_12a_from_two_ascii(self):
150 |         u = unichr_12a_from_two_ascii('Z', 'E')
151 |         self.assertEqual(u, u"\u9485")
152 | 
153 |         u = unichr_12a_from_two_ascii('z', ',')
154 |         self.assertEqual(u, u"\u8CAC")
155 | 
156 |     def test_ascii_characters_encoding(self):
157 |         s = "abc"
158 |         
159 |         gen = gen_encode_unichars_from_bytes(s)
160 |         
161 |         u0 = gen.next()
162 |         u1 = gen.next()
163 |         
164 |         self.assertFalse(list(gen))        
165 | 
166 |         self.assertEqual(u0, u"\u9662")
167 |         self.assertEqual(u1, u"\u0463")
168 | 
169 |     def test_ascii_characters_encoding_2(self):
170 |     
171 |         s = "ZE"
172 |         
173 |         gen = gen_encode_unichars_from_bytes(s)
174 |         
175 |         u0 = gen.next()
176 |         
177 |         self.assertFalse(list(gen))        
178 | 
179 |         self.assertEqual(u0, unichr_12a_from_two_ascii('Z', 'E'))
180 | 
181 |     def test_two_unichr_to_repeat_byte_ntimes_aaa(self):
182 |         (uni_b, uni_r) = two_unichr_to_repeat_byte_ntimes(ord('a'), 10)
183 |                 
184 |         self.assertEqual(ord(uni_b), 0x0461)
185 |         self.assertEqual(ord(uni_r), 0x4E0A)
186 | 
187 |     def test_two_unichr_to_repeat_byte_ntimes_xxx(self):
188 |         (uni_b, uni_r) = two_unichr_to_repeat_byte_ntimes(ord('x'), 3)
189 |         
190 |         self.assertEqual(ord(uni_b), 0x0478)
191 |         self.assertEqual(ord(uni_r), 0x4E03)
192 |     
193 |     def test_repeat(self):
194 |     
195 |         s = "xxx"
196 |         
197 |         gen = gen_encode_unichars_from_bytes(s)
198 |         
199 |         (u0, u1) = gen.next()
200 |                 
201 |         self.assertFalse(list(gen))        
202 | 
203 |         print "%x %x" % (ord(u0), ord(u1))
204 | 
205 |         self.assertEqual(ord(u0), 0x0478)
206 |         self.assertEqual(ord(u1), 0x4E03)
207 |         
208 |     def test_ascii_characters_decoding(self):
209 |     
210 |         s = [u"\u9662", u"\u0463"]
211 |         
212 |         s2 = []
213 |         for chunks in gen_decode_bytes_from_string(s):
214 |             for b in chunks:
215 |                 s2.append(b)
216 | 
217 |         self.assertEqual(s2[0], ord('a'))
218 |         self.assertEqual(s2[1], ord('b'))
219 |         self.assertEqual(s2[2], ord('c'))
220 |     
221 |     def test_ascii_characters_decoding_2(self):
222 |     
223 |         s = [u"\u9485"]
224 |         
225 |         s2 = []
226 |         for chunks in gen_decode_bytes_from_string(s):
227 |             for b in chunks:
228 |                 s2.append(b)
229 |         
230 |         self.assertEqual(s2[0], ord('Z'))
231 |         self.assertEqual(s2[1], ord('E'))
232 | 
233 |     def test_five_bytes_encoding(self):
234 |         bytes = "\xab\xcd\xef\xab\xcd"
235 |         gen = gen_encode_unichars_from_bytes(bytes)
236 |         
237 |         (u1, u2) = gen.next()
238 |         (u3) = gen.next()
239 |         (u4) = gen.next()
240 |         
241 |         self.assertFalse(list(gen))        
242 | 
243 |         self.assertEqual(u1, unichr(U12b_start + 0xABC))
244 |         self.assertEqual(u2, unichr(U12b_start + 0xDEF))
245 |         self.assertEqual(u3, unichr_08_from_int(0xAB))
246 |         self.assertEqual(u4, unichr_08_from_int(0xCD))
247 |     
248 |     def test_ascii_and_bytes_encoding(self):
249 |         bytes = "\xab\xcd\xef"
250 |         bytes += "\x61\x62\x63\x64\x65" # abcde
251 |         
252 |         gen = gen_encode_unichars_from_bytes(bytes)
253 |         
254 |         (u1, u2) = gen.next()
255 |         (u3) = gen.next()
256 |         (u4) = gen.next()
257 |         (u5) = gen.next()
258 |         
259 |         self.assertFalse(list(gen))        
260 | 
261 |         self.assertEqual(u1, unichr(U12b_start + 0xABC))
262 |         self.assertEqual(u2, unichr(U12b_start + 0xDEF))
263 |         self.assertEqual(u3, unichr_12a_from_two_ascii('a', 'b'))
264 |         self.assertEqual(u4, unichr_12a_from_two_ascii('c', 'd'))
265 |         self.assertEqual(u5, unichr_08_from_int(ord('e')))
266 |     
267 |     def test_ascii_and_bytes_decoding(self):
268 |         
269 |         u1 = unichr(U12b_start + 0xABC)
270 |         u2 = unichr(U12b_start + 0xDEF)
271 |         u3 = unichr_12a_from_two_ascii('a', 'b')
272 |         u4 = unichr_12a_from_two_ascii('c', 'd')
273 |         u5 = unichr_08_from_int(ord('e'))
274 |                 
275 |         s = u''
276 |         s += u1
277 |         s += u2
278 |         s += u3
279 |         s += u4
280 |         s += u5
281 |         
282 |         gen = gen_decode_bytes_from_string(s)
283 |                
284 |         (a,b,c) = gen.next()
285 |         (d,e) = gen.next()
286 |         (f,g) = gen.next()
287 |         h = gen.next()
288 |              
289 |         self.assertFalse(list(gen))        
290 |    
291 |         self.assertEqual(a, 0xAB)
292 |         self.assertEqual(b, 0xCD)
293 |         self.assertEqual(c, 0xEF)
294 |             
295 |     def test_repeats(self):
296 |     
297 |         l = [1,1,1,2,1]
298 |     
299 |         n = number_of_left_instances_from_index(l, 0)
300 |         
301 |         self.assertEqual(n, 3)
302 | 
303 |     def test_empty_string(self):
304 |     
305 |         bytes = ""
306 |         
307 |         gen = gen_encode_unichars_from_bytes(bytes)
308 |         
309 |         self.assertFalse(list(gen))        
310 | 
311 |     def test_one_char(self):
312 |     
313 |         bytes = "a"
314 |         
315 |         gen = gen_encode_unichars_from_bytes(bytes)
316 |         
317 |         u1 = gen.next()
318 | 
319 |         self.assertFalse(list(gen))        
320 |         
321 |         self.assertEqual(ord(u1), 0x0461)
322 | 
323 |     def test_repeats_2(self):
324 |     
325 |         bytes = "\xAB\xCD\xEF\xFF\xFF\xFF\xFF\x00"
326 |         
327 |         gen = gen_encode_unichars_from_bytes(bytes)
328 |         
329 |         (u1, u2) = gen.next()
330 |         (u3, u4) = gen.next()
331 |         u5 = gen.next()
332 | 
333 |         self.assertFalse(list(gen))        
334 |         
335 |         self.assertEqual(ord(u1), 0x58BC)
336 |         self.assertEqual(ord(u2), 0x5bEF)
337 |         self.assertEqual(ord(u3), 0x04FF)
338 |         self.assertEqual(ord(u4), 0x4E04)
339 |         self.assertEqual(ord(u5), 0x0400)
340 |     
341 |     def test_encode_macho_header(self):
342 |     
343 |         bytes = "\xCF\xFA\xED\xFE\x07\x00\x00\x01"
344 |         
345 |         gen = gen_encode_unichars_from_bytes(bytes)
346 |         
347 |         (u1, u2) = gen.next()
348 |         (u3, u4) = gen.next()
349 |         u5 = gen.next()
350 | 
351 |         self.assertFalse(list(gen))        
352 |         
353 |         self.assertEqual(ord(u1), 0x5AFF)
354 |         self.assertEqual(ord(u2), 0x58ED)
355 |         self.assertEqual(ord(u3), 0x5DE0)
356 |         self.assertEqual(ord(u4), 0x5500)
357 |         self.assertEqual(ord(u5), 0x5E01)
358 |     
359 |     def test_big_repeats_2000_minus_2(self):
360 | 
361 |         bytes = ["\xAA"] * (0x2000 - 2)
362 |         
363 |         gen = gen_encode_unichars_from_bytes(bytes)
364 |         
365 |         (u1, u2) = gen.next()
366 |         (u3, u4) = gen.next()
367 |         
368 |         self.assertFalse(list(gen))        
369 | 
370 |         self.assertEqual(ord(u1), 0x04AA)
371 |         self.assertEqual(ord(u2), 0x5DFF)
372 |         self.assertEqual(ord(u3), 0x04AA)
373 |         self.assertEqual(ord(u4), 0x5DFF)
374 |         
375 |     def test_big_repeats_2000(self):
376 |     
377 |         bytes = ["\xAA"] * 0x2000
378 |         
379 |         gen = gen_encode_unichars_from_bytes(bytes)
380 |         
381 |         (u1, u2) = gen.next()
382 |         (u3, u4) = gen.next()
383 |         u5 = gen.next()
384 |         u6 = gen.next()
385 | 
386 |         self.assertFalse(list(gen))        
387 | 
388 |         self.assertEqual(ord(u1), 0x04AA)
389 |         self.assertEqual(ord(u2), 0x5DFF)
390 |         self.assertEqual(ord(u3), 0x04AA)
391 |         self.assertEqual(ord(u4), 0x5DFF)
392 |         self.assertEqual(ord(u5), 0x04AA)
393 |         self.assertEqual(ord(u6), 0x04AA)
394 | 
395 |     def test_ascii_text_encoding_decoding(self):
396 |     
397 |         s = "if I'd listened everything that they said to me, took the time to bleed from all the tiny little arrows shot my way, I wouldn't be here! the ones who don't do anything are always the ones who try to put you down. I'm talking to you: hero time starts right now! time to shine!"
398 |         
399 |         encode_gen = gen_encode_unichars_from_bytes(s)
400 | 
401 |         e = [b for b in encode_gen]
402 | 
403 |         s2 = ''.join([chr(c) for chunk in gen_decode_bytes_from_string(e) for c in chunk])
404 |         
405 |         self.assertEqual(s, s2)
406 | 
407 |     def test_ascii_text_encoding_decoding_2(self):
408 |     
409 |         s = ''.join([chr(i) for i in range(32, 128)])
410 | 
411 |         encode_gen = gen_encode_unichars_from_bytes(s)
412 | 
413 |         e = [b for b in encode_gen]
414 | 
415 |         self.assertTrue(len(e) * 2 == len(s))
416 | 
417 |         s2 = ''.join([chr(c) for chunk in gen_decode_bytes_from_string(e) for c in chunk])
418 |         
419 |         self.assertEqual(s, s2)
420 | 
421 | if __name__ == '__main__':
422 | #    unittest.main()
423 |     suite = unittest.TestLoader().loadTestsFromTestCase(TestUnidata)
424 |     unittest.TextTestRunner(verbosity=2).run(suite)
425 | 


--------------------------------------------------------------------------------
/python/unibinary.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Nicolas Seriot, 2013-01-17
  3 | 
  4 | """
  5 | UniBinary, or "Base64 for Unicode".
  6 | 
  7 | Encodes and decodes data into printable Unichode characters.
  8 | 
  9 | 2 ASCII characters -> 1 unicode character
 10 | 3 arbitrary bytes  -> 2 unicode characters
 11 | [3, 0xFFF] repeats -> 2 unicode characters
 12 | 
 13 | The encoded text can be copied / pasted / posted on Twitter and stored as UTF-8 text files.
 14 | 
 15 | http://github.com/nst/UniBinary/
 16 | 
 17 | $ python unibinary.py -h
 18 | 
 19 | $ python unibinary.py -e /bin/date > /tmp/date.txt
 20 | $ file /tmp/date.txt 
 21 | /tmp/date.txt: UTF-8 Unicode text, with very long lines, with no line terminators
 22 | 
 23 | $ python unibinary.py -d /tmp/date.txt > /tmp/date
 24 | $ file /tmp/date
 25 | /tmp/date: Mach-O 64-bit executable x86_64
 26 | 
 27 | $ chmod +x /tmp/date
 28 | $ /tmp/date
 29 | Thu Jan 17 18:02:24 CET 2013
 30 | """
 31 | 
 32 | import struct
 33 | import sys
 34 | import codecs
 35 | import hashlib
 36 | import argparse
 37 | 
 38 | __author__ = "Nicolas Seriot"
 39 | __license__ = "BSD"
 40 | 
 41 | # http://docs.python.org/2/howto/unicode.html
 42 | # http://unicode.org/Public/UNIDATA/Blocks.txt
 43 | 
 44 | # encodes ascii characters (7 bits)
 45 | U12a_0_0_start = 0x5E00 # CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 0,0
 46 | U12a_0_1_start = 0x6E00 # CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 0,1
 47 | U12a_1_0_start = 0x7E00 # CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 1,0
 48 | U12a_1_1_start = 0x8E00 # CJK Unified Ideographs (subset) - encodes 12 bits (2 ascii) - MSB 1,1
 49 | U12a_length = 0x1000
 50 | 
 51 | # encodes arbitrary bits
 52 | U12b_start = 0x4E00 # CJK Unified Ideographs (subset) - encodes 12 bits
 53 | U12b_length = 0x1000
 54 | U8_start = 0x0400   # Cyrillic                        - encodes 8 bits
 55 | U8_length = 0x100  
 56 | 
 57 | def two_unichr_to_repeat_byte_ntimes(b, n):
 58 |     if n > 0xFFF:
 59 |         raise ValueError
 60 |     
 61 |     if b > 0xFF:
 62 |         raise ValueError
 63 |     
 64 |     uni_b = unichr(U8_start + b)
 65 |     uni_r = unichr(U12b_start + n)
 66 |     
 67 |     return (uni_b, uni_r)
 68 | 
 69 | def unichr_12a_from_two_ascii(a1, a2):
 70 |     
 71 |     i1 = ord(a1)
 72 |     i2 = ord(a2)
 73 | 
 74 |     unicode_start = None
 75 | 
 76 |     if i1 < 64 and i2 < 64:
 77 |         unicode_start = U12a_0_0_start
 78 |     elif i1 < 64 and i2 >= 64:
 79 |         i2 -= 64
 80 |         unicode_start = U12a_0_1_start
 81 |     elif i1 >= 64 and i2 < 64:
 82 |         i1 -= 64
 83 |         unicode_start = U12a_1_0_start
 84 |     elif i1 >= 64 and i2 >= 64:
 85 |         i1 -= 64
 86 |         i2 -= 64
 87 |         unicode_start = U12a_1_1_start
 88 |     
 89 |     return unichr(unicode_start + (i1 << 6) + i2)
 90 | 
 91 | def unichr_08_from_int(i):
 92 |     if i > (U8_start + U8_length):
 93 |         print "-- unichr_08_from_int: 0x%x" % i
 94 |         raise ValueError
 95 |     
 96 |     return unichr(U8_start + i)
 97 | 
 98 | def unichr_12_from_int(i):
 99 |     if i > (U12b_start + U12b_length):
100 |         print "-- unichr_12_from_int: 0x%x" % i
101 |         raise ValueError
102 |     
103 |     return unichr(U12b_start + i)
104 | 
105 | def int_from_u12a(u):
106 |     
107 |     i = ord(u)
108 |     
109 |     if i < U12a_start or i > (U12a_start + U12a_length):
110 |         print "-- int_from_u12a: %c" % u
111 |         raise ValueError
112 | 
113 |     return i - U12a_start
114 | 
115 | def int_from_u08b(u):
116 |     
117 |     i = ord(u)
118 |     
119 |     if i < U8_start or i > (U8_start + U8_length):
120 |         print "-- int_from_u08b: %c" % u
121 |         raise ValueError
122 | 
123 |     return i - U8_start
124 | 
125 | def two_bytes_from_u12a(u):
126 |     i1 = None
127 |     i2 = None
128 |     unicode_start = None
129 |     
130 |     i = ord(u)
131 |     for start in (U12a_0_0_start, U12a_0_1_start, U12a_1_0_start, U12a_1_1_start):
132 |         if i >= start and i < (start + U12a_length):
133 |             unicode_start = start
134 |             break
135 | 
136 |     if not unicode_start:
137 |         print "-- two_bytes_from_u12a ord=0x%02x" % ord(u)
138 |         raise ValueError
139 | 
140 |     value = i - unicode_start
141 |     b0 = (value & 0xFC0) >> 6
142 |     b1 = i & 0x3F
143 |     
144 |     if unicode_start == U12a_0_0_start:
145 |         pass
146 |     elif unicode_start == U12a_0_1_start:
147 |         b1 += 64
148 |     elif unicode_start == U12a_1_0_start:
149 |         b0 += 64
150 |     elif unicode_start == U12a_1_1_start:
151 |         b0 += 64
152 |         b1 += 64
153 |     
154 |     return (b0, b1)
155 | 
156 | def int_from_u12b(u):
157 |     
158 |     i = ord(u)
159 |     
160 |     if i < U12b_start or i > (U12b_start + U12b_length):
161 |         print "-- int_from_u12b: %c" % u
162 |         raise ValueError
163 | 
164 |     return i - U12b_start
165 | 
166 | def two_twelve_bits_values_from_three_bytes(a, b, c):
167 |     # (0x12, 0x34, 0x56) -> (0x123, 0x456)
168 | 
169 |     if a > 0xFF or b > 0xFF or c > 0xFF:
170 |         raise ValueError
171 | 
172 |     s1 = (a << 4) + (b >> 4)
173 |     s2 = ((b & 0xF) << 8) + c
174 |     
175 |     return (s1, s2)
176 | 
177 | def three_bytes_from_two_twelve_bits_values(i1, i2):
178 |     # (0x123, 0x456) -> (0x12, 0x34, 0x56)
179 |     
180 |     if i1 > 0xFFF or i2 > 0xFFF:
181 |         raise ValueError
182 |     
183 |     b1 = i1 >> 4
184 |     b2 = ((i1 & 0xF) << 4) + ((i2 & 0xF00) >> 8)
185 |     b3 = i2 & 0x0FF
186 |     
187 |     return (b1, b2, b3)
188 |     
189 | def number_of_left_instances_from_index(l, index):
190 |     i = index
191 |     c = 0
192 |     x = l[i]
193 |     while i < len(l):
194 |         if l[i] == x:
195 |             c += 1
196 |         else:
197 |             break
198 |         i += 1
199 |     
200 |     return c
201 | 
202 | def three_bytes_from_unichars(u1, u2):
203 |     i1 = int_from_u12b(u1)
204 |     i2 = int_from_u12b(u2)
205 |     (b1, b2, b3) = three_bytes_from_two_twelve_bits_values(i1, i2)
206 |     return (b1, b2, b3)
207 | 
208 | def repeated_bytes_from_unichars(u1, u2):
209 |     b = int_from_u08b(u1)
210 |     n = int_from_u12b(u2)
211 |     return ([b])*n
212 | 
213 | def two_bytes_from_unichars(u1, u2):
214 |     b1 = int_from_u08b(u1)
215 |     b2 = int_from_u08b(u2)
216 |     return (b1, b2)
217 | 
218 | def is_in_U8a(u):
219 |     i = ord(u)
220 |     return i >= U8a_start and i < (U8a_start + U8a_length)
221 | 
222 | def is_in_U12a(u):
223 |     i = ord(u)
224 |     
225 |     for unicode_start in (U12a_0_0_start, U12a_0_1_start, U12a_1_0_start, U12a_1_1_start):
226 |         if i >= unicode_start and i < (unicode_start + U12a_length):
227 |             return True
228 |     
229 |     return False
230 | 
231 | def is_in_U8b(u):
232 |     i = ord(u)
233 |     return i >= U8_start and i < (U8_start + U8_length)
234 | 
235 | def is_in_U12b(u):
236 |     i = ord(u)
237 |     return i >= U12b_start and i < (U12b_start + U12b_length)
238 | 
239 | def bytes_from_u1_u2(u1, u2):
240 |     
241 |     u1_in_U12 = is_in_U12b(u1)
242 |     u2_in_U12 = is_in_U12b(u2)
243 |     
244 |     u1_in_U8 = is_in_U8b(u1)
245 |     u2_in_U8 = is_in_U8b(u2)
246 |     
247 |     if u1_in_U12 and u2_in_U12:
248 |         return three_bytes_from_unichars(u1, u2)
249 |     elif u1_in_U8 and u2_in_U12:
250 |         return repeated_bytes_from_unichars(u1, u2)
251 |     elif u1_in_U8 and u2_in_U8:
252 |         return two_bytes_from_unichars(u1, u2)
253 |     else:
254 |         print "-- %c %c 0x%x 0x%x" % (u1, u2, ord(u1), ord(u2))
255 |         raise ValueError
256 | 
257 | def gen_encode_unichars_from_bytes(bytes):
258 |     
259 |     i = 0
260 |     
261 |     while (i < len(bytes)):
262 |         r = number_of_left_instances_from_index(bytes, i)
263 |         
264 |         if r >= 3:
265 |             # read N bytes | N >= 3 and N < 0x1000, encode as 2 unichar
266 | 
267 |             if r >= 0x1000:
268 |                 r = 0xFFF
269 |                         
270 |             length = r
271 |             (uni_b, uni_n) = two_unichr_to_repeat_byte_ntimes(ord(bytes[i]), r)
272 |             i += length
273 |             yield (uni_b, uni_n)
274 |         
275 |         else:
276 |             
277 |             two_ascii_chars_available = len(bytes) >= i+2 and ord(bytes[i]) < 128 and ord(bytes[i+1]) < 128
278 |             
279 |             if two_ascii_chars_available:
280 |                 # read 2 x 7 bits, encode 1 unichar
281 |                 
282 |                 (a1, a2) = bytes[i:i+2]
283 |                 i += 2
284 |                 yield unichr_12a_from_two_ascii(a1, a2)
285 |                 
286 |             elif len(bytes) >= i+3:
287 |                 # read 3 bytes, encode 2 unichars 
288 | 
289 |                 b = struct.unpack("BBB", bytes[i:i+3])
290 |                 i += 3
291 | 
292 |                 (a,b,c) = b
293 |                 (s1, s2) = two_twelve_bits_values_from_three_bytes(a, b, c)
294 |                 yield (unichr_12_from_int(s1), unichr_12_from_int(s2))
295 | 
296 |             else:
297 |                 # read 1 byte, encode 1 unichar
298 | 
299 |                 b = struct.unpack("B", bytes[i])
300 |                 i += 1
301 | 
302 |                 yield (unichr_08_from_int(b[0]))
303 | 
304 | def gen_decode_bytes_from_string(s):
305 |     
306 |     i = 0
307 |         
308 |     while (i < len(s)):
309 |         
310 |         if s[i] == '\n':
311 |             i += 1
312 |             continue
313 |         
314 |         if is_in_U12a(s[i]):
315 |             # 1 U12a -> read 2 ascii characters
316 |             bytes = two_bytes_from_u12a(s[i])
317 |             i += 1
318 |             yield bytes
319 |         elif i+1 < len(s):
320 |             # (U12b, U12b) -> read 3 bytes
321 |             # (U8b, U12b) -> read repetition
322 |             # (U8b, U8b) -> read 1 byte, 1 byte
323 |             u1 = s[i]
324 |             i += 1
325 |             
326 |             while s[i] == '\n':
327 |                 i += 1
328 |             
329 |             u2 = s[i]
330 |             i += 1
331 |             
332 |             bytes = bytes_from_u1_u2(u1, u2)
333 |             yield bytes
334 |         elif is_in_U8b(s[i]):
335 |             # 1 U8b -> read 1 byte
336 |             b = int_from_u08b(s[i])
337 |             i += 1
338 |             yield tuple([b])
339 |         else:
340 |             print "-- cannot decode", s
341 |             sys.exit(1)
342 |             
343 | # main
344 | 
345 | def print_decoded_string(s):
346 | 
347 |     for bytes in gen_decode_bytes_from_string(s):
348 |         for b in bytes:
349 |             buf = struct.pack("B", b)
350 |             sys.stdout.write(buf)
351 | 
352 | if __name__ == '__main__':
353 |     
354 |     parser = argparse.ArgumentParser(description='UniBinary encodes and decodes data into printable Unicode characters.')
355 |     parser.add_argument('-e','--encode', help='file to encode')
356 |     parser.add_argument('-d','--decode', help='file to decode')
357 |     parser.add_argument('-es','--encode_string', help='utf-8 string to encode')
358 |     parser.add_argument('-ds','--decode_string', help='utf-8 string to decode')
359 |     args = vars(parser.parse_args())
360 |     
361 |     if args['encode']:    
362 |         f = open(args['encode'], "rb")
363 |         bytes = f.read()
364 |         f.close()
365 |         
366 |         UTF8Writer = codecs.getwriter('utf-8')
367 |         sys.stdout = UTF8Writer(sys.stdout)
368 |         
369 |         for unichars in gen_encode_unichars_from_bytes(bytes):
370 |             string = unicode(''.join(unichars))
371 |             sys.stdout.write(string)        
372 |         
373 |         sys.stdout.flush()
374 |         sys.stderr.write('\n')
375 |         
376 |     elif args['decode']:
377 |         f = codecs.open(args['decode'], "r", encoding='utf-8')
378 |         s = f.read()
379 |         f.close()
380 | 
381 |         print_decoded_string(s)
382 |         
383 |         sys.stdout.flush()
384 |         sys.stderr.write('\n')
385 | 
386 |     elif args['encode_string']:
387 |         UTF8Writer = codecs.getwriter('utf-8')
388 |         sys.stdout = UTF8Writer(sys.stdout)
389 | 
390 |         for unichars in gen_encode_unichars_from_bytes(args['encode_string']):
391 |             string = unicode(''.join(unichars))
392 |             sys.stdout.write(string)
393 | 
394 |         sys.stdout.flush()
395 |         sys.stderr.write('\n')
396 | 
397 |     elif args['decode_string']:
398 |         print_decoded_string(args['decode_string'].decode('utf-8'))
399 | 
400 |         sys.stdout.flush()
401 |         sys.stderr.write('\n')
402 | 
403 |     else:
404 |         parser.print_help()
405 | 


--------------------------------------------------------------------------------