├── .gitignore
├── .travis.yml
├── Makefile
├── README.rst
├── common_tests
    ├── ux.cover
    └── ux_SUITE.erl
├── ct-run.sh
├── doc
    ├── README.md
    ├── edoc-info
    ├── erlang.png
    ├── stylesheet.css
    ├── uca
    │   ├── package-summary.md
    │   ├── ux_uca_compress.md
    │   ├── ux_uca_decomp.md
    │   ├── ux_uca_extract.md
    │   ├── ux_uca_options.md
    │   ├── ux_uca_sort_key_binary.md
    │   ├── ux_uca_sort_key_binary_cs.md
    │   ├── ux_uca_sort_key_list.md
    │   ├── ux_uca_sort_key_uncompressed.md
    │   ├── ux_uca_testdata.md
    │   └── ux_uca_utils.md
    ├── unidata
    │   └── package-summary.md
    ├── utils
    │   ├── package-summary.md
    │   ├── ux_opt_ranges.md
    │   └── ux_ranges.md
    ├── ux_char.md
    ├── ux_gb.md
    ├── ux_string.md
    ├── ux_types.md
    ├── ux_uca.md
    └── ux_wb.md
├── priv
    ├── UCA
    │   ├── CollationAuxiliary
    │   │   └── allkeys_CLDR.txt.gz
    │   └── allkeys.txt.gz
    └── UNIDATA
    │   ├── Blocks.txt
    │   ├── CompositionExclusions.txt
    │   ├── DerivedNormalizationProps.txt.gz
    │   ├── Scripts.txt
    │   ├── UnicodeData.txt.gz
    │   └── auxiliary
    │       ├── GraphemeBreakProperty.txt.gz
    │       ├── GraphemeBreakTest.txt.gz
    │       ├── WordBreakProperty.txt.gz
    │       └── WordBreakTest.txt.gz
├── rebar
├── rebar.config
├── src
    ├── uca
    │   ├── ux.hrl
    │   ├── ux_uca.hrl
    │   ├── ux_uca_alt.erl
    │   ├── ux_uca_compress.erl
    │   ├── ux_uca_decomp.erl
    │   ├── ux_uca_extract.erl
    │   ├── ux_uca_options.erl
    │   ├── ux_uca_sort_key_binary.erl
    │   ├── ux_uca_sort_key_binary_cs.erl
    │   ├── ux_uca_sort_key_list.erl
    │   ├── ux_uca_sort_key_uncompressed.erl
    │   ├── ux_uca_testdata.erl
    │   └── ux_uca_utils.erl
    ├── unidata
    │   ├── ux.hrl
    │   ├── ux_unidata_filelist.erl
    │   ├── ux_unidata_parser.erl
    │   ├── ux_unidata_parser_allkeys.erl
    │   ├── ux_unidata_parser_blocks.erl
    │   ├── ux_unidata_parser_comp_exclusions.erl
    │   ├── ux_unidata_parser_grapheme_break_property.erl
    │   ├── ux_unidata_parser_norm_props.erl
    │   ├── ux_unidata_parser_scripts.erl
    │   ├── ux_unidata_parser_unidata.erl
    │   ├── ux_unidata_parser_word_break_property.erl
    │   ├── ux_unidata_server.erl
    │   ├── ux_unidata_store.erl
    │   └── ux_unidata_store_sup.erl
    ├── utils
    │   ├── ux_opt_ranges.erl
    │   └── ux_ranges.erl
    ├── ux.app.src
    ├── ux.erl
    ├── ux.hrl
    ├── ux_app.erl
    ├── ux_char.erl
    ├── ux_deps.erl
    ├── ux_gb.erl
    ├── ux_string.erl
    ├── ux_sup.erl
    ├── ux_types.erl
    ├── ux_uca.erl
    ├── ux_unidata.erl
    └── ux_wb.erl
├── start-dev.sh
├── test
    ├── ux_break_tests.erl
    ├── ux_char_tests.erl
    ├── ux_string_tests.erl
    ├── ux_tests.hrl
    └── ux_uca_tests.erl
├── testing
    ├── UCA
    │   └── CollationTest
    │   │   ├── CollationTest_NON_IGNORABLE_SHORT.txt.gz
    │   │   └── CollationTest_SHIFTED_SHORT.txt.gz
    └── UNIDATA
    │   ├── NormalizationTest.txt.gz
    │   └── auxiliary
    │       ├── GraphemeBreakTest.txt
    │       └── WordBreakTest.txt
├── ux_test.cfg
└── ux_test.spec


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.beam
 3 | *.trace
 4 | _test
 5 | *.dump
 6 | deps/*
 7 | ebin/
 8 | .eunit
 9 | log
10 | logs
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: erlang
 2 | branches:
 3 |   only:
 4 |     - master
 5 | notifications:
 6 |   email: arcusfelis@gmail.com
 7 |   irc: 
 8 | env:
 9 |   - R15B01
10 |   - R15B
11 |   - R14B04
12 |   - R14B03
13 |   - R14B02
14 |   - R14B01
15 |   - R14A
16 |   - R13B04
17 | 
18 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | PREFIX:=../
 3 | DEST:=$(PREFIX)$(PROJECT)
 4 | 
 5 | REBAR=./rebar
 6 | 
 7 | all:
 8 | 	@$(REBAR) get-deps compile
 9 | 
10 | edoc:
11 | 	@$(REBAR) skip_deps=true doc
12 | 
13 | eunit:
14 | 	@$(REBAR) skip_deps=true eunit
15 | 
16 | clean:
17 | 	@$(REBAR) clean
18 | 
19 | build_plt:
20 | 	@$(REBAR) build-plt
21 | 
22 | check_plt:
23 | 	@$(REBAR) check-plt
24 | 
25 | dialyzer:
26 | 	@$(REBAR) dialyze
27 | 
28 | app:
29 | 	@$(REBAR) create template=mochiwebapp dest=$(DEST) appid=$(PROJECT)
30 | 
31 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ; -- Mode: Markdown; -- ; vim: filetype=markdown tw=76 expandtab
  2 | shiftwidth=4 tabstop=4
  3 | 
  4 | Unicode eXtension
  5 | =================
  6 | 
  7 | **License**: `Apache License, Version
  8 | 2.0 <http://www.apache.org/licenses/LICENSE-2.0.html>`_
  9 | 
 10 | **Alternative license**:
 11 | `LGPLv3 <http://http://www.gnu.org/licenses/lgpl-3.0.html>`_
 12 | 
 13 | **Author**: Uvarov Michael (arcusfelis@gmail.com) 
 14 | 
 15 | **Unidata version**: 6.1.0
 16 | 
 17 | `Read edoc documentation <https://github.com/erlang-unicode/ux/blob/master/doc/README.md>`_
 18 | 
 19 | Module for working with strings. A string is a flatten list of Unicode
 20 | characters.
 21 | 
 22 | All actions with Unicode were described in the `Unicode
 23 | Standards <http://www.unicode.org/reports/>`_.
 24 | 
 25 | .. image:: https://secure.travis-ci.org/erlang-unicode/ux.png?branch=master
 26 |     :alt: Build Status
 27 |     :target: http://travis-ci.org/erlang-unicode/ux
 28 | 
 29 | 
 30 | This library realized only these documents:
 31 | -------------------------------------------
 32 | 
 33 | -  `UAX 15 <http://www.unicode.org/reports/tr15/>`_ Unicode
 34 |    Normalization Forms
 35 | -  `UTS 10 <http://www.unicode.org/reports/tr10/>`_ Unicode Collation
 36 |    Algorithm
 37 | 
 38 | and some parts from:
 39 | --------------------
 40 | 
 41 | -  `UAX 44 <http://www.unicode.org/reports/tr44/>`_ Unicode Character
 42 |    Database
 43 | 
 44 | Structure of the library
 45 | ========================
 46 | 
 47 | ``ux_string`` uses ``ux_char`` and ``ux_unidata``.
 48 | 
 49 | ``ux_uca`` uses ``ux_char`` and ``ux_unidata``.
 50 | 
 51 | ``ux_char`` uses ``ux_unidata``.
 52 | 
 53 | ``ux_unidata`` is for an internal data access.
 54 | 
 55 | ux_string.erl: String Functions for lists of Unicode characters.
 56 | ================================================================
 57 | 
 58 | This module provides the functions for operations with
 59 | `UNIDATA <http://www.ksu.ru/eng/departments/ktk/test/perl/lib/unicode/UCDFF301.html>`_.
 60 | UNIDATA contains data about Unicode characters.
 61 | 
 62 | Functions for working with Unicode Normal Forms (UNF)
 63 | -----------------------------------------------------
 64 | 
 65 | -  to_nfc/1
 66 | -  to_nfd/1
 67 | -  to_nfkd/1
 68 | -  to_nfkc/1
 69 | -  is_nfc/1
 70 | -  is_nfd/1
 71 | -  is_nfkc/1
 72 | -  is_nfkd/1
 73 | 
 74 | Functions from stdlib for Unicode strings
 75 | -----------------------------------------
 76 | 
 77 | -  to_lower/1
 78 | -  to_upper/1
 79 | 
 80 | Functions for processing strings as groups of graphemes
 81 | -------------------------------------------------------
 82 | 
 83 | Grapheme is a letter with its modifiers. 
 84 | -  length/1 
 85 | -  reverse/1
 86 | -  first/2
 87 | -  last/2
 88 | 
 89 | Examples
 90 | --------
 91 | 
 92 | Code:
 93 | 
 94 | .. code-block:: erlang
 95 | 
 96 |     (ux@delta)11> ux_string:length("FF g̈").
 97 |     4
 98 |     (ux@delta)12> string:len("FF g̈").       
 99 |     5
100 |     (ux@delta)13> ux_string:to_graphemes("FF g̈").
101 |     ["F","F"," ",[103,776]]
102 | 
103 | "PHP-style" string functions
104 | ----------------------------
105 | 
106 | -  explode/2,3
107 | -  html_special_chars/1 (htmlspecialchars in php)
108 | -  strip_tags/1,2
109 | 
110 | Examples
111 | ~~~~~~~~
112 | 
113 | Code:
114 | 
115 | .. code-block:: erlang
116 | 
117 |     ux_string:explode(["==", "++", "|"], "+++-+=|==|==|=+-+++").
118 | 
119 | Result:
120 | 
121 | .. code-block:: erlang
122 | 
123 |     [[],"+-+=",[],[],[],[],"=+-","+"]
124 | 
125 | Code:
126 | 
127 | .. code-block:: erlang
128 | 
129 |     ux_html:strip_tags("<b>bold text</b>").
130 | 
131 | Result:
132 | 
133 | .. code-block:: erlang
134 | 
135 |     "bold text"
136 | 
137 | Types function
138 | --------------
139 | 
140 | Type is a General Category in Unicode.
141 | 
142 | Code:
143 | 
144 | .. code-block:: erlang
145 | 
146 |     Str = "Erlang created the field of telephone
147 |     networks analysis. His early work in scrutinizing the use of local, exchange
148 |     and trunk telephone line usage in a small community, to understand the
149 |     theoretical requirements of an efficient network led to the creation of the
150 |     Erlang formula, which became a foundational element of present day
151 |     telecommunication network studies.",
152 |     ux_string:explode_types(['Zs', 'Lu'], Str).
153 | 
154 | Result:
155 | 
156 | .. code-block:: erlang
157 | 
158 |     [[],"rlang","created","the","field","of","telephone",
159 |      "networks","analysis.",[],"is","early","work","in",
160 |      "scrutinizing","the","use","of","local,","exchange","and",
161 |      "trunk","telephone","line","usage","in","a","small",
162 |      [...]|...]
163 | 
164 | Code:
165 | 
166 | .. code-block:: erlang
167 | 
168 |     ux_string:types(Str).
169 | 
170 | Result:
171 | 
172 | .. code-block:: erlang
173 | 
174 |     ['Lu','Ll','Ll','Ll','Ll','Ll','Zs','Ll','Ll','Ll','Ll',
175 |      'Ll','Ll','Ll','Zs','Ll','Ll','Ll','Zs','Ll','Ll','Ll','Ll',
176 |      'Ll','Zs','Ll','Ll','Zs','Ll'...]
177 | 
178 | Where atom ``'Lu'`` is Letter, Uppercase; ll is Letter, Lowercase. Read
179 | more about types from description of ``ux_char:type/1``.
180 | 
181 | Code:
182 | 
183 | .. code-block:: erlang
184 | 
185 |     ux_string:delete_types(['Ll'], Str).
186 | 
187 | Result:
188 | 
189 | .. code-block:: erlang
190 | 
191 |     "E       . H        ,          ,                E ,           ."
192 | 
193 | ux_char.erl: Char Functions
194 | ===========================
195 | 
196 | Code:
197 | 
198 | .. code-block:: erlang
199 | 
200 |     ux_char:type($ ).
201 | 
202 | Result:
203 | 
204 | .. code-block:: erlang
205 | 
206 |     'Zs'
207 | 
208 | `List of types <http://www.ksu.ru/eng/departments/ktk/test/perl/lib/unicode/UCDFF301.html#General%20Category>`_
209 | ---------------------------------------------------------------------------------------------------------------
210 | 
211 | -  Normative Categories:
212 | 
213 |    -  Lu Letter, Uppercase
214 |    -  Ll Letter, Lowercase
215 |    -  Lt Letter, Titlecase
216 |    -  Mn Mark, Non-Spacing
217 |    -  Mc Mark, Spacing Combining
218 |    -  Me Mark, Enclosing
219 |    -  Nd Number, Decimal Digit
220 |    -  Nl Number, Letter
221 |    -  No Number, Other
222 |    -  Zs Separator, Space
223 |    -  Zl Separator, Line
224 |    -  Zp Separator, Paragraph
225 |    -  Cc Other, Control
226 |    -  Cf Other, Format
227 |    -  Cs Other, Surrogate
228 |    -  Co Other, Private Use
229 |    -  Cn Other, Not Assigned (no characters in the file have this
230 |       property)
231 | 
232 | -  Informative Categories:
233 | 
234 |    -  Lm Letter, Modifier
235 |    -  Lo Letter, Other
236 |    -  Pc Punctuation, Connector
237 |    -  Pd Punctuation, Dash
238 |    -  Ps Punctuation, Open
239 |    -  Pe Punctuation, Close
240 |    -  Pi Punctuation, Initial quote (may behave like Ps or Pe depending
241 |       on usage)
242 |    -  Pf Punctuation, Final quote (may behave like Ps or Pe depending on
243 |       usage)
244 |    -  Po Punctuation, Other
245 |    -  Sm Symbol, Math
246 |    -  Sc Symbol, Currency
247 |    -  Sk Symbol, Modifier
248 |    -  So Symbol, Other
249 | 
250 | ux_uca.erl: Unicode Collation Algorithm
251 | =======================================
252 | 
253 | See `Unicode Technical Standard #10 <http://unicode.org/reports/tr10/>`_.
254 | 
255 | Functions
256 | ---------
257 | 
258 | -  compare/2,3
259 | -  sort/1,2
260 | -  sort_key/1,2
261 | -  sort_array/1,2
262 | -  search/2,3,4
263 | 
264 | Examples
265 | --------
266 | 
267 | Code from erlang shell:
268 | 
269 | .. code-block:: erlang
270 | 
271 |     1> ux_uca:sort_key("a").   
272 |     <<21,163,0,0,32,0,0,2,0,0,255,255>>
273 | 
274 |     2> ux_uca:sort_key("abc"). 
275 |     <<21,163,21,185,21,209,0,0,34,0,0,4,0,0,255,255,255,255,
276 |       255,255>>
277 | 
278 |     3> ux_uca:sort_key("abcd").
279 |     <<21,163,21,185,21,209,21,228,0,0,35,0,0,5,0,0,255,255,
280 |       255,255,255,255,255,255>>
281 | 
282 | Code:
283 | 
284 | .. code-block:: erlang
285 | 
286 |     ux_uca:compare("a", "a").
287 |     ux_uca:compare("a", "b").
288 |     ux_uca:compare("c", "b").
289 | 
290 | Result:
291 | 
292 | ::
293 | 
294 |     equal
295 |     lower
296 |     greater
297 | 
298 | Code:
299 | 
300 | .. code-block:: erlang
301 | 
302 |     Options = ux_uca_options:get_options([ 
303 |             {natural_sort, false}, 
304 |             {strength, 3}, 
305 |             {alternate, shifted} 
306 |         ]),
307 |     InStrings = ["erlang", "esl", "nitrogen", "epm", "mochiweb", "rebar", "eunit"],
308 |     OutStrings = ux_uca:sort(Options, InStrings),
309 |     [io:format("~ts~n", [S]) || S <- OutStrings],
310 | 
311 |     SortKeys = [{Str, ux_uca:sort_key(Options, Str)} || Str <- OutStrings],
312 |     [io:format("~ts ~w~n", [S, K]) || {S, K} <- SortKeys],
313 | 
314 |     ok.
315 | 
316 | Result:
317 | 
318 | ::
319 | 
320 |     epm
321 |     erlang
322 |     esl
323 |     eunit
324 |     mochiweb
325 |     nitrogen
326 |     rebar
327 |     epm [5631,5961,5876,0,32,32,32,0,2,2,2]
328 |     erlang [5631,6000,5828,5539,5890,5700,0,32,32,32,32,32,32,0,2,2,2,2,2,2]
329 |     esl [5631,6054,5828,0,32,32,32,0,2,2,2]
330 |     eunit [5631,6121,5890,5760,6089,0,32,32,32,32,32,0,2,2,2,2,2]
331 |     mochiweb [5876,5924,5585,5735,5760,6180,5631,5561,0,32,32,32,32,32,32,32,32,0,2,2,2,2,2,2,2,2]
332 |     nitrogen [5890,5760,6089,6000,5924,5700,5631,5890,0,32,32,32,32,32,32,32,32,0,2,2,2,2,2,2,2,2]
333 |     rebar [6000,5631,5561,5539,6000,0,32,32,32,32,32,0,2,2,2,2,2]
334 |     ok
335 | 
336 | Searching
337 | ---------
338 | 
339 | Code:
340 | 
341 | .. code-block:: erlang
342 | 
343 |     (ux@delta)30> ux_uca:search("The quick brown fox jumps over the lazy dog.",
344 |     "fox").
345 |     {"The quick brown ","fox"," jumps over the lazy dog."}
346 | 
347 |     (ux@delta)33> ux_uca:search("The quick brown fox jumps over the lazy dog.",
348 |     "cat").         
349 |     false
350 | 
351 | Searching and Strength
352 | ----------------------
353 | 
354 | Code:
355 | 
356 | .. code-block:: erlang
357 | 
358 |     (ux@delta)20> CF = fun(S) -> ux_uca_options:get_options([{strength,S}]) end.      
359 |     #Fun<erl_eval.6.80247286>
360 | 
361 |     (ux@delta)32> ux_uca:search(CF(2), "The quick brown fox jumps over the lazy
362 |     dog.", "dog", maximal).
363 |     {"The quick brown fox jumps over the lazy"," dog.",[]}
364 | 
365 |     (ux@delta)21> ux_uca:search(CF(2), "fF", "F").                                    
366 |     {[],"f","F"}
367 | 
368 |     (ux@delta)22> ux_uca:search(CF(3), "fF", "F").
369 |     {"f","F",[]}
370 | 
371 | Searching and Match-Style
372 | -------------------------
373 | 
374 | Code:
375 | 
376 | .. code-block:: erlang
377 | 
378 |     (ux@delta)20> CF = fun(S) -> ux_uca_options:get_options([{strength,S}]) end.      
379 |     #Fun<erl_eval.6.80247286>
380 | 
381 |     (ux@delta)27> ux_uca:search(CF(3), "! F   ?S?", "! F !", 'minimal').
382 |     {"! ","F","   ?S?"}
383 | 
384 |     (ux@delta)28> ux_uca:search(CF(3), "! F   ?S?", "! F !", 'maximal').
385 |     {[],"! F   ?","S?"}
386 | 
387 |     (ux@delta)29> ux_uca:search(CF(3), "! F   ?S?", "! F !", 'medium'). 
388 |     {[],"! F ","  ?S?"}
389 | 
390 | ux_unidata.erl
391 | ==============
392 | 
393 | Stores UNIDATA information. For internal using only.
394 | 
395 | Data loading
396 | ============
397 | 
398 | .. code-block:: erlang
399 | 
400 |     ux_unidata_filelist:set_source(Level, ParserType, ImportedDataTypes,
401 |     FromFile).
402 | 
403 | For example:
404 | 
405 | .. code-block:: erlang
406 | 
407 |     ux_unidata_filelist:set_source(process, blocks, all, code:priv_dir(ux) ++ "/UNIDATA/Blocks.txt").
408 | 
409 | loads data about Unicode blocks from ``priv/UNIDATA/Blocks.txt``.
410 | 
411 | So, different processes can use their own unidata dictionaries.
412 | 
413 | Level is ``process``, ``application`` or ``node``.
414 | 
415 | Parsers are located into ``ux_unidata_parser_*`` modules.
416 | 
417 | Default unidata files are loaded when the application tries get the
418 | access to them.
419 | 


--------------------------------------------------------------------------------
/common_tests/ux.cover:
--------------------------------------------------------------------------------
1 | {incl_app, ux, details}.
2 | 


--------------------------------------------------------------------------------
/common_tests/ux_SUITE.erl:
--------------------------------------------------------------------------------
 1 | -module(ux_SUITE).
 2 | 
 3 | -include_lib("common_test/include/ct.hrl").
 4 | -include_lib("eunit/include/eunit.hrl").
 5 | 
 6 | -export([suite/0, all/0, groups/0,
 7 | 	 init_per_group/2, end_per_group/2,
 8 | 	 init_per_suite/1, end_per_suite/1,
 9 | 	 init_per_testcase/2, end_per_testcase/2]).
10 | 
11 | -export([
12 |         ux_load_case/0,
13 |         ux_load_case/1,
14 |         ux_race_cond_case/0,
15 |         ux_race_cond_case/1
16 | ]).
17 | 
18 | 
19 | suite() ->
20 |     [{timetrap, {minutes, 3}}].
21 | 
22 | %% Setup/Teardown
23 | %% ----------------------------------------------------------------------
24 | init_per_group(main_group, Config) ->
25 |     init_locations(Config);
26 | init_per_group(_Group, Config) ->
27 |     Config.
28 | 
29 | end_per_group(main_group, Config) ->
30 |     end_locations(Config);
31 | end_per_group(_Group, _Config) ->
32 |     ok.
33 | 
34 | init_per_suite(Config) ->
35 |     application:start(ux),
36 |     Config.
37 | 
38 | end_per_suite(Config) ->
39 |     ok.
40 | 
41 | end_locations(_Config) ->
42 |     ok.
43 | 
44 | init_locations(Config) ->
45 |     %% Setup locations that some of the test cases use
46 | %   DataDir = ?config(data_dir, Config),
47 |     Config.
48 | 
49 | init_per_testcase(ux_race_cond_case, Config) ->
50 |     ux:stop(),
51 |     ux:start(),
52 |     Config;
53 | init_per_testcase(_Case, Config) ->
54 |     Config.
55 | 
56 | end_per_testcase(_Case, _Config) ->
57 |     ok.
58 | 
59 | %% Configuration
60 | %% ----------------------------------------------------------------------
61 | 
62 | 
63 | 
64 | %% Tests
65 | %% ----------------------------------------------------------------------
66 | groups() ->
67 |     [{main_group, [], [
68 |                 ux_load_case,
69 |                 ux_race_cond_case
70 |     ]}].
71 | 
72 | all() ->
73 |     [{group, main_group}].
74 | 
75 | 
76 | 
77 | ux_load_case() ->
78 |     [{require, common_conf, ux_common_config}].
79 | 
80 | ux_race_cond_case() ->
81 |     [{require, common_conf, ux_common_config}].
82 | 
83 | 
84 | ux_load_case(Cfg) ->
85 |     ux_string:to_lower([1090,1077,1089,1090]), %% тест
86 |     ux_string:to_nfkc([1090,1077,1089,1090]), %% тест
87 |     ok.
88 | 
89 | 
90 | ux_race_cond_case(Cfg) ->
91 |     [spawn_link(fun() ->
92 |         ux_string:to_lower([1090,1077,1089,1090]), %% тест
93 |         ux_string:to_nfkc([1090,1077,1089,1090]) %% тест
94 |         end) || _ <- lists:seq(1, 20)].
95 | 


--------------------------------------------------------------------------------
/ct-run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | cd `dirname $0`
3 | ct_run -spec ux_test.spec -pa $PWD/ebin edit $PWD/deps/*/ebin 
4 | #   -s reloader
5 | 
6 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #The ux application#
 4 | 
 5 | 
 6 | ##Packages##
 7 | 
 8 | 
 9 | <table width="100%" border="0" summary="list of packages"><tr><td><a href="uca/package-summary.md" class="package">uca</a></td></tr><tr><td><a href="unidata/package-summary.md" class="package">unidata</a></td></tr><tr><td><a href="utils/package-summary.md" class="package">utils</a></td></tr></table>
10 | 
11 | 
12 | 
13 | ##Modules##
14 | 
15 | 
16 | <table width="100%" border="0" summary="list of modules">
17 | <tr><td><a href="ux_char.md" class="module">ux_char</a></td></tr>
18 | <tr><td><a href="ux_gb.md" class="module">ux_gb</a></td></tr>
19 | <tr><td><a href="ux_string.md" class="module">ux_string</a></td></tr>
20 | <tr><td><a href="ux_types.md" class="module">ux_types</a></td></tr>
21 | <tr><td><a href="ux_uca.md" class="module">ux_uca</a></td></tr>
22 | <tr><td><a href="ux_uca_options.md" class="module">ux_uca_options</a></td></tr>
23 | <tr><td><a href="ux_wb.md" class="module">ux_wb</a></td></tr></table>
24 | 
25 | 


--------------------------------------------------------------------------------
/doc/edoc-info:
--------------------------------------------------------------------------------
1 | {application,ux}.
2 | {packages,[uca,unidata,utils]}.
3 | {modules,[ux_char,ux_gb,ux_string,ux_types,ux_uca,ux_uca_options,ux_wb]}.
4 | 


--------------------------------------------------------------------------------
/doc/erlang.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/doc/erlang.png


--------------------------------------------------------------------------------
/doc/stylesheet.css:
--------------------------------------------------------------------------------
 1 | /* standard EDoc style sheet */
 2 | body {
 3 | 	font-family: Verdana, Arial, Helvetica, sans-serif;
 4 |       	margin-left: .25in;
 5 |        	margin-right: .2in;
 6 |        	margin-top: 0.2in;
 7 |        	margin-bottom: 0.2in;
 8 |        	color: #000000;
 9 |        	background-color: #ffffff;
10 | }
11 | h1,h2 {
12 |  	margin-left: -0.2in;
13 | }
14 | div.navbar {
15 | 	background-color: #add8e6;
16 | 	padding: 0.2em;
17 | }
18 | h2.indextitle {
19 | 	padding: 0.4em;
20 | 	background-color: #add8e6;
21 | }
22 | h3.function,h3.typedecl {
23 | 	background-color: #add8e6;
24 |  	padding-left: 1em;
25 | }
26 | div.spec {
27 |  	margin-left: 2em;
28 | 	background-color: #eeeeee;
29 | }
30 | a.module,a.package {
31 | 	text-decoration:none
32 | }
33 | a.module:hover,a.package:hover {
34 | 	background-color: #eeeeee;
35 | }
36 | ul.definitions {
37 | 	list-style-type: none;
38 | }
39 | ul.index {
40 | 	list-style-type: none;
41 | 	background-color: #eeeeee;
42 | }
43 | 
44 | /*
45 |  * Minor style tweaks
46 |  */
47 | ul {
48 | 	list-style-type: square;
49 | }
50 | table {
51 | 	border-collapse: collapse;
52 | }
53 | td {
54 | 	padding: 3
55 | }
56 | 


--------------------------------------------------------------------------------
/doc/uca/package-summary.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | #Package uca#
4 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_compress.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_compress#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#reassign_fun-3">reassign_fun/3</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="reassign_fun-3"></a>
23 | 
24 | ###reassign_fun/3##
25 | 
26 | 
27 | 
28 | 
29 | `reassign_fun(Lvl, Min, Max) -> any()`
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_decomp.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_decomp#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#compute-2">compute/2</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="compute-2"></a>
23 | 
24 | ###compute/2##
25 | 
26 | 
27 | 
28 | 
29 | `compute(Char, List) -> any()`
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_extract.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | #Module ux_uca_extract#
4 | 
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_options.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_options#
 4 | * [Description](#description)
 5 | * [Function Index](#index)
 6 | * [Function Details](#functions)
 7 | 
 8 | 
 9 | This library contains functions for manipulating with
10 | a configuration of sorting.
11 | 
12 | <a name="description"></a>
13 | 
14 | ##Description##
15 |         You can use it as:
16 | `C = ux_uca_options:get_options(shifted).`
17 | And then:
18 | `ux_uca:sort(C, ["string1", "string2", "string3").`<a name="index"></a>
19 | 
20 | ##Function Index##
21 | 
22 | 
23 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#get_options-0">get_options/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_options-1">get_options/1</a></td><td></td></tr><tr><td valign="top"><a href="#get_options-2">get_options/2</a></td><td>If you want use this library without import *.hrl, you can create
24 | a #uca_options {} record with this function.</td></tr></table>
25 | 
26 | 
27 | <a name="functions"></a>
28 | 
29 | ##Function Details##
30 | 
31 | <a name="get_options-0"></a>
32 | 
33 | ###get_options/0##
34 | 
35 | 
36 | 
37 | 
38 | `get_options() -> any()`
39 | 
40 | <a name="get_options-1"></a>
41 | 
42 | ###get_options/1##
43 | 
44 | 
45 | 
46 | 
47 | `get_options(Params) -> any()`
48 | 
49 | <a name="get_options-2"></a>
50 | 
51 | ###get_options/2##
52 | 
53 | 
54 | 
55 | 
56 | `get_options(C, T) -> any()`
57 | 
58 | 
59 | 
60 | If you want use this library without import *.hrl, you can create
61 | a #uca_options {} record with this function.


--------------------------------------------------------------------------------
/doc/uca/ux_uca_sort_key_binary.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_sort_key_binary#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#sort_key-2">sort_key/2</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="sort_key-2"></a>
23 | 
24 | ###sort_key/2##
25 | 
26 | 
27 | 
28 | 
29 | `sort_key(C, S) -> any()`
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_sort_key_binary_cs.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_sort_key_binary_cs#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#sort_key-2">sort_key/2</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="sort_key-2"></a>
23 | 
24 | ###sort_key/2##
25 | 
26 | 
27 | 
28 | 
29 | `sort_key(C, S) -> any()`
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_sort_key_list.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_sort_key_list#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#sort_key-2">sort_key/2</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="sort_key-2"></a>
23 | 
24 | ###sort_key/2##
25 | 
26 | 
27 | 
28 | 
29 | `sort_key(C, S) -> any()`
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_sort_key_uncompressed.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_sort_key_uncompressed#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#sort_key-2">sort_key/2</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="sort_key-2"></a>
23 | 
24 | ###sort_key/2##
25 | 
26 | 
27 | 
28 | 
29 | `sort_key(C, S) -> any()`
30 | 
31 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_testdata.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_uca_testdata#
 4 | * [Function Index](#index)
 5 | * [Function Details](#functions)
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="index"></a>
11 | 
12 | ##Function Index##
13 | 
14 | 
15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#read_line-2">read_line/2</a></td><td>Read line from a testdata file Fd (see CollationTest.html).</td></tr><tr><td valign="top"><a href="#read_shifted-0">read_shifted/0</a></td><td></td></tr></table>
16 | 
17 | 
18 | <a name="functions"></a>
19 | 
20 | ##Function Details##
21 | 
22 | <a name="read_line-2"></a>
23 | 
24 | ###read_line/2##
25 | 
26 | 
27 | 
28 | 
29 | `read_line(Fd, StrNum) -> any()`
30 | 
31 | 
32 | 
33 | Read line from a testdata file Fd (see CollationTest.html).
34 | Returns {Plain string, List of Codepoints, StrNum} or eof
35 | Used by test/4.<a name="read_shifted-0"></a>
36 | 
37 | ###read_shifted/0##
38 | 
39 | 
40 | 
41 | 
42 | `read_shifted() -> any()`
43 | 
44 | 


--------------------------------------------------------------------------------
/doc/uca/ux_uca_utils.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #Module ux_uca_utils#
  4 | * [Function Index](#index)
  5 | * [Function Details](#functions)
  6 | 
  7 | 
  8 | 
  9 | 
 10 | <a name="index"></a>
 11 | 
 12 | ##Function Index##
 13 | 
 14 | 
 15 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#do_alt-2">do_alt/2</a></td><td></td></tr><tr><td valign="top"><a href="#do_alt-3">do_alt/3</a></td><td></td></tr><tr><td valign="top"><a href="#do_extract-3">do_extract/3</a></td><td></td></tr><tr><td valign="top"><a href="#get_ducet-0">get_ducet/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_options-0">get_options/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_reassign_function-2">get_reassign_function/2</a></td><td></td></tr><tr><td valign="top"><a href="#hangul_type-1">hangul_type/1</a></td><td></td></tr><tr><td valign="top"><a href="#implicit_type-1">implicit_type/1</a></td><td></td></tr><tr><td valign="top"><a href="#split_levels-3">split_levels/3</a></td><td></td></tr></table>
 16 | 
 17 | 
 18 | <a name="functions"></a>
 19 | 
 20 | ##Function Details##
 21 | 
 22 | <a name="do_alt-2"></a>
 23 | 
 24 | ###do_alt/2##
 25 | 
 26 | 
 27 | 
 28 | 
 29 | <pre>do_alt(A::function(), W::binary() | integer()) -&gt; [integer()]</pre>
 30 | <br></br>
 31 | 
 32 | 
 33 | <a name="do_alt-3"></a>
 34 | 
 35 | ###do_alt/3##
 36 | 
 37 | 
 38 | 
 39 | 
 40 | `do_alt(A, W, S) -> any()`
 41 | 
 42 | <a name="do_extract-3"></a>
 43 | 
 44 | ###do_extract/3##
 45 | 
 46 | 
 47 | 
 48 | 
 49 | <pre>do_extract(Uca_options::#uca_options{}, S::string(), D::function()) -&gt; {integer(), string()}</pre>
 50 | <br></br>
 51 | 
 52 | 
 53 | <a name="get_ducet-0"></a>
 54 | 
 55 | ###get_ducet/0##
 56 | 
 57 | 
 58 | 
 59 | 
 60 | <pre>get_ducet() -&gt; function()</pre>
 61 | <br></br>
 62 | 
 63 | 
 64 | <a name="get_options-0"></a>
 65 | 
 66 | ###get_options/0##
 67 | 
 68 | 
 69 | 
 70 | 
 71 | <pre>get_options() -&gt; #uca_options{}</pre>
 72 | <br></br>
 73 | 
 74 | 
 75 | <a name="get_reassign_function-2"></a>
 76 | 
 77 | ###get_reassign_function/2##
 78 | 
 79 | 
 80 | 
 81 | 
 82 | `get_reassign_function(D, L) -> any()`
 83 | 
 84 | <a name="hangul_type-1"></a>
 85 | 
 86 | ###hangul_type/1##
 87 | 
 88 | 
 89 | 
 90 | 
 91 | `hangul_type(X) -> any()`
 92 | 
 93 | <a name="implicit_type-1"></a>
 94 | 
 95 | ###implicit_type/1##
 96 | 
 97 | 
 98 | 
 99 | 
100 | `implicit_type(X) -> any()`
101 | 
102 | <a name="split_levels-3"></a>
103 | 
104 | ###split_levels/3##
105 | 
106 | 
107 | 
108 | 
109 | <pre>split_levels(L::integer(), B::boolean(), W::[[integer()]]) -&gt; {[integer()], [[integer()]]}</pre>
110 | <br></br>
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/doc/unidata/package-summary.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | #Package unidata#
4 | 


--------------------------------------------------------------------------------
/doc/utils/package-summary.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | #Package utils#
4 | 


--------------------------------------------------------------------------------
/doc/utils/ux_opt_ranges.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_opt_ranges#
 4 | * [Description](#description)
 5 | * [Function Index](#index)
 6 | * [Function Details](#functions)
 7 | 
 8 | 
 9 | Functions for working with ranges in lists.
10 | 
11 | <a name="description"></a>
12 | 
13 | ##Description##
14 | 
15 | 
16 | 
17 | 
18 | ETS is fast only as a key-value store.       
19 | But some data files contains ranges: From..To.       
20 | The fastest way is using lists for storing this values.
21 | 
22 | 
23 | 
24 | There is two types of these lists:
25 | * with booleans: `[{1,3}, 6, {8,9}]`. For example, `is_compat`;
26 | * with values: `[{{1,3}, value1}, {{4,12}, value2}]`.
27 | 
28 | `in_list` function is for the first type.
29 | `search` function is for the second type.
30 | <a name="index"></a>
31 | 
32 | ##Function Index##
33 | 
34 | 
35 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#in_list-1">in_list/1</a></td><td></td></tr><tr><td valign="top"><a href="#search-2">search/2</a></td><td></td></tr></table>
36 | 
37 | 
38 | <a name="functions"></a>
39 | 
40 | ##Function Details##
41 | 
42 | <a name="in_list-1"></a>
43 | 
44 | ###in_list/1##
45 | 
46 | 
47 | 
48 | 
49 | `in_list(V) -> any()`
50 | 
51 | <a name="search-2"></a>
52 | 
53 | ###search/2##
54 | 
55 | 
56 | 
57 | 
58 | `search(Def, V) -> any()`
59 | 
60 | 


--------------------------------------------------------------------------------
/doc/utils/ux_ranges.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_ranges#
 4 | * [Description](#description)
 5 | * [Function Index](#index)
 6 | * [Function Details](#functions)
 7 | 
 8 | 
 9 | Functions for working with ranges in lists.
10 | 
11 | <a name="description"></a>
12 | 
13 | ##Description##
14 | 
15 | 
16 | 
17 | 
18 | ETS is fast only as a key-value store.       
19 | But some data files contains ranges: From..To.       
20 | The fastest way is using lists for storing this values.
21 | 
22 | 
23 | 
24 | There is two types of these lists:
25 | * with booleans: `[{1,3}, 6, {8,9}]`. For example, `is_compat`;
26 | * with values: `[{{1,3}, value1}, {{4,12}, value2}]`.
27 | 
28 | `in_list` function is for the first type.
29 | `search` function is for the second type.
30 | <a name="index"></a>
31 | 
32 | ##Function Index##
33 | 
34 | 
35 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#in_list-2">in_list/2</a></td><td></td></tr><tr><td valign="top"><a href="#search-2">search/2</a></td><td></td></tr></table>
36 | 
37 | 
38 | <a name="functions"></a>
39 | 
40 | ##Function Details##
41 | 
42 | <a name="in_list-2"></a>
43 | 
44 | ###in_list/2##
45 | 
46 | 
47 | 
48 | 
49 | <pre>in_list(T::[{integer(), integer()} | integer()], H::integer()) -&gt; boolean()</pre>
50 | <br></br>
51 | 
52 | 
53 | <a name="search-2"></a>
54 | 
55 | ###search/2##
56 | 
57 | 
58 | 
59 | 
60 | <pre>search(T::[{{integer(), integer()} | integer(), term()}], H::integer()) -&gt; boolean()</pre>
61 | <br></br>
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/doc/ux_char.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #Module ux_char#
  4 | * [Description](#description)
  5 | * [Data Types](#types)
  6 | * [Function Index](#index)
  7 | * [Function Details](#functions)
  8 | 
  9 | 
 10 | Character functions.
 11 | 
 12 | 
 13 | 
 14 | Copyright (c) 2010-2011 Michael Uvarov
 15 | 
 16 | __Authors:__ Michael Uvarov ([`arcusfelis@gmail.com`](mailto:arcusfelis@gmail.com)).
 17 | <a name="types"></a>
 18 | 
 19 | ##Data Types##
 20 | 
 21 | 
 22 | 
 23 | 
 24 | ###<a name="type-char_type">char_type()</a>##
 25 | 
 26 | 
 27 | 
 28 | <pre>char_type() = <a href="ux_types.md#type-char_type">ux_types:char_type()</a></pre>
 29 | <a name="index"></a>
 30 | 
 31 | ##Function Index##
 32 | 
 33 | 
 34 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#block-1">block/1</a></td><td></td></tr><tr><td valign="top"><a href="#comment-1">comment/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_acsii-1">is_acsii/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_cjk_compatibility_ideograph-1">is_cjk_compatibility_ideograph/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_cjk_unified_ideograph-1">is_cjk_unified_ideograph/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_decimal-1">is_decimal/1</a></td><td>Return true, if C is a decimal number.</td></tr><tr><td valign="top"><a href="#is_hangul-1">is_hangul/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_hangul_precomposed-1">is_hangul_precomposed/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_letter-1">is_letter/1</a></td><td>Returns true, if C is a letter.</td></tr><tr><td valign="top"><a href="#is_lower-1">is_lower/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_mark-1">is_mark/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_number-1">is_number/1</a></td><td>Returns true, if is C is a number.</td></tr><tr><td valign="top"><a href="#is_punctuation_mark-1">is_punctuation_mark/1</a></td><td>Returns true, if is C is a punctiation mark.</td></tr><tr><td valign="top"><a href="#is_separator-1">is_separator/1</a></td><td>Return true, if is C is a separator.</td></tr><tr><td valign="top"><a href="#is_unified_ideograph-1">is_unified_ideograph/1</a></td><td></td></tr><tr><td valign="top"><a href="#is_upper-1">is_upper/1</a></td><td></td></tr><tr><td valign="top"><a href="#script-1">script/1</a></td><td></td></tr><tr><td valign="top"><a href="#to_lower-1">to_lower/1</a></td><td></td></tr><tr><td valign="top"><a href="#to_ncr-1">to_ncr/1</a></td><td></td></tr><tr><td valign="top"><a href="#to_upper-1">to_upper/1</a></td><td></td></tr><tr><td valign="top"><a href="#type-1">type/1</a></td><td></td></tr></table>
 35 | 
 36 | 
 37 | <a name="functions"></a>
 38 | 
 39 | ##Function Details##
 40 | 
 41 | <a name="block-1"></a>
 42 | 
 43 | ###block/1##
 44 | 
 45 | 
 46 | 
 47 | 
 48 | <pre>block(V::char) -&gt; atom()</pre>
 49 | <br></br>
 50 | 
 51 | 
 52 | <a name="comment-1"></a>
 53 | 
 54 | ###comment/1##
 55 | 
 56 | 
 57 | 
 58 | 
 59 | <pre>comment(V::char()) -&gt; binary()</pre>
 60 | <br></br>
 61 | 
 62 | 
 63 | <a name="is_acsii-1"></a>
 64 | 
 65 | ###is_acsii/1##
 66 | 
 67 | 
 68 | 
 69 | 
 70 | <pre>is_acsii(Char::char()) -&gt; boolean()</pre>
 71 | <br></br>
 72 | 
 73 | 
 74 | <a name="is_cjk_compatibility_ideograph-1"></a>
 75 | 
 76 | ###is_cjk_compatibility_ideograph/1##
 77 | 
 78 | 
 79 | 
 80 | 
 81 | `is_cjk_compatibility_ideograph(Ch) -> any()`
 82 | 
 83 | <a name="is_cjk_unified_ideograph-1"></a>
 84 | 
 85 | ###is_cjk_unified_ideograph/1##
 86 | 
 87 | 
 88 | 
 89 | 
 90 | `is_cjk_unified_ideograph(Ch) -> any()`
 91 | 
 92 | <a name="is_decimal-1"></a>
 93 | 
 94 | ###is_decimal/1##
 95 | 
 96 | 
 97 | 
 98 | 
 99 | <pre>is_decimal(C::char()) -&gt; boolean()</pre>
100 | <br></br>
101 | 
102 | 
103 | 
104 | 
105 | Return true, if C is a decimal number.<a name="is_hangul-1"></a>
106 | 
107 | ###is_hangul/1##
108 | 
109 | 
110 | 
111 | 
112 | `is_hangul(Char) -> any()`
113 | 
114 | <a name="is_hangul_precomposed-1"></a>
115 | 
116 | ###is_hangul_precomposed/1##
117 | 
118 | 
119 | 
120 | 
121 | `is_hangul_precomposed(Char) -> any()`
122 | 
123 | <a name="is_letter-1"></a>
124 | 
125 | ###is_letter/1##
126 | 
127 | 
128 | 
129 | 
130 | <pre>is_letter(C::char()) -&gt; boolean()</pre>
131 | <br></br>
132 | 
133 | 
134 | 
135 | 
136 | Returns true, if C is a letter.<a name="is_lower-1"></a>
137 | 
138 | ###is_lower/1##
139 | 
140 | 
141 | 
142 | 
143 | <pre>is_lower(V::char()) -&gt; boolean()</pre>
144 | <br></br>
145 | 
146 | 
147 | <a name="is_mark-1"></a>
148 | 
149 | ###is_mark/1##
150 | 
151 | 
152 | 
153 | 
154 | `is_mark(C) -> any()`
155 | 
156 | <a name="is_number-1"></a>
157 | 
158 | ###is_number/1##
159 | 
160 | 
161 | 
162 | 
163 | <pre>is_number(C::char()) -&gt; boolean()</pre>
164 | <br></br>
165 | 
166 | 
167 | 
168 | 
169 | Returns true, if is C is a number.<a name="is_punctuation_mark-1"></a>
170 | 
171 | ###is_punctuation_mark/1##
172 | 
173 | 
174 | 
175 | 
176 | <pre>is_punctuation_mark(C::char()) -&gt; boolean()</pre>
177 | <br></br>
178 | 
179 | 
180 | 
181 | 
182 | Returns true, if is C is a punctiation mark.<a name="is_separator-1"></a>
183 | 
184 | ###is_separator/1##
185 | 
186 | 
187 | 
188 | 
189 | <pre>is_separator(C::char()) -&gt; boolean()</pre>
190 | <br></br>
191 | 
192 | 
193 | 
194 | 
195 | Return true, if is C is a separator.<a name="is_unified_ideograph-1"></a>
196 | 
197 | ###is_unified_ideograph/1##
198 | 
199 | 
200 | 
201 | 
202 | `is_unified_ideograph(Ch) -> any()`
203 | 
204 | <a name="is_upper-1"></a>
205 | 
206 | ###is_upper/1##
207 | 
208 | 
209 | 
210 | 
211 | <pre>is_upper(V::char()) -&gt; boolean()</pre>
212 | <br></br>
213 | 
214 | 
215 | <a name="script-1"></a>
216 | 
217 | ###script/1##
218 | 
219 | 
220 | 
221 | 
222 | <pre>script(V::char) -&gt; atom()</pre>
223 | <br></br>
224 | 
225 | 
226 | <a name="to_lower-1"></a>
227 | 
228 | ###to_lower/1##
229 | 
230 | 
231 | 
232 | 
233 | <pre>to_lower(V::char()) -&gt; char()</pre>
234 | <br></br>
235 | 
236 | 
237 | <a name="to_ncr-1"></a>
238 | 
239 | ###to_ncr/1##
240 | 
241 | 
242 | 
243 | 
244 | <pre>to_ncr(Char::char()) -&gt; string()</pre>
245 | <br></br>
246 | 
247 | 
248 | <a name="to_upper-1"></a>
249 | 
250 | ###to_upper/1##
251 | 
252 | 
253 | 
254 | 
255 | <pre>to_upper(V::char()) -&gt; char()</pre>
256 | <br></br>
257 | 
258 | 
259 | <a name="type-1"></a>
260 | 
261 | ###type/1##
262 | 
263 | 
264 | 
265 | 
266 | <pre>type(V::char()) -> <a href="#type-char_type">char_type()</a></pre>
267 | <br></br>
268 | 
269 | 
270 | 


--------------------------------------------------------------------------------
/doc/ux_gb.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_gb#
 4 | * [Description](#description)
 5 | * [Function Index](#index)
 6 | * [Function Details](#functions)
 7 | 
 8 | 
 9 | Default Grapheme Cluster Boundary Breaker.
10 | 
11 | <a name="description"></a>
12 | 
13 | ##Description##
14 | 
15 | 
16 | [UTR29: Grapheme Cluster Boundaries]
17 | (http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)<a name="index"></a>
18 | 
19 | ##Function Index##
20 | 
21 | 
22 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#split-2">split/2</a></td><td></td></tr></table>
23 | 
24 | 
25 | <a name="functions"></a>
26 | 
27 | ##Function Details##
28 | 
29 | <a name="split-2"></a>
30 | 
31 | ###split/2##
32 | 
33 | 
34 | 
35 | 
36 | `split(T, S) -> any()`
37 | 
38 | 


--------------------------------------------------------------------------------
/doc/ux_types.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_types#
 4 | * [Data Types](#types)
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | <a name="types"></a>
11 | 
12 | ##Data Types##
13 | 
14 | 
15 | 
16 | 
17 | ###<a name="type-char_type">char_type()</a>##
18 | 
19 | 
20 | 
21 | <pre>char_type() = lu | ll | lt | mn | mc | me | nd | nl | no | zs | zl | zp | cc | cf | cs | co | cn | lm | lo | pc | pd | ps | pe | pi | pf | po | sm | sc | sk | so | other</pre>
22 | 
23 | 
24 | 
25 | ###<a name="type-ux_ccc">ux_ccc()</a>##
26 | 
27 | 
28 | 
29 | <pre>ux_ccc() = 0..240</pre>
30 | 


--------------------------------------------------------------------------------
/doc/ux_uca.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #Module ux_uca#
  4 | * [Description](#description)
  5 | * [Data Types](#types)
  6 | * [Function Index](#index)
  7 | * [Function Details](#functions)
  8 | 
  9 | 
 10 | UNICODE COLLATION ALGORITHM        
 11 | see Unicode Technical Standard #10.
 12 | 
 13 | 
 14 | 
 15 | Copyright (c) 2010-2011 Michael Uvarov
 16 | 
 17 | __Authors:__ Michael Uvarov ([`arcusfelis@gmail.com`](mailto:arcusfelis@gmail.com)).<a name="description"></a>
 18 | 
 19 | ##Description##
 20 | 
 21 | 
 22 | 
 23 | 
 24 | ###<a name="Additional_information_(and_links)">Additional information (and links)</a>##
 25 | 
 26 | 
 27 | 
 28 | 
 29 | 1. [Hangul Collation Requirements](http://www.open-std.org/jtc1/sc22/wg20/docs/n1037-Hangul%20Collation%20Requirements.htm)   
 30 | PS: There is the main source of information.
 31 | 
 32 | 
 33 | 
 34 | 2. [Terminator weight for Hangul](http://code.activestate.com/lists/perl-unicode/2163/)
 35 | 
 36 | 
 37 | 
 38 | 3. [Theory vs. practice for Korean text collation](http://blogs.msdn.com/b/michkap/archive/2005/02/25/380266.aspx)   
 39 | PS: there is no any practice. They do not the UCA :/
 40 | 
 41 | 
 42 | 
 43 | 4. [Wiki](http://en.wikipedia.org/wiki/Unicode_collation_algorithm)
 44 | 
 45 | 
 46 | 
 47 | 6. [Unicode implementer's guide part 3: Conjoining jamo behavior](http://useless-factor.blogspot.com/2007/08/unicode-implementers-guide-part-3.md)
 48 | 
 49 | 
 50 | 
 51 | 7. [Unicode implementer's guide part 5: Collation](http://useless-factor.blogspot.com/2007/10/unicode-implementers-guide-part-5.md)
 52 | 
 53 | 
 54 | 
 55 | 8. [Unicode collation works now](http://useless-factor.blogspot.com/2008/05/unicode-collation-works-now.md)   
 56 | PS: I found it so late. :(
 57 | 
 58 | 
 59 | 
 60 | 9. [ICU](http://userguide.icu-project.org/collation/concepts)
 61 | 
 62 | 
 63 | 
 64 | 10. [String Sorting (Natural) in Erlang Cookbook](http://trapexit.org/String_Sorting_%28Natural%29)
 65 | 
 66 | 
 67 | 
 68 | 
 69 | For hangul collation:
 70 | 11. [Hangul Collation Requirements](http://www.open-std.org/Jtc1/sc22/wg20/docs/n1037-Hangul%20Collation%20Requirements.htm)
 71 | 12. [UTR 10](http://www.unicode.org/reports/tr10/#Hangul_Collation)
 72 | 13. [KSX1001 on Wiki](http://en.wikipedia.org/wiki/KSX1001)
 73 | 
 74 | 
 75 | 
 76 | 
 77 | ###<a name="Levels">Levels</a>##
 78 | 
 79 | 
 80 |    
 81 | http://unicode.org/reports/tr10/#Multi_Level_Comparison
 82 | 
 83 | 
 84 | 
 85 | * L1 Base characters   
 86 | * L2 Accents   
 87 | * L3 Case   
 88 | * L4 Punctuation
 89 | 
 90 | Example using levels:
 91 | <pre>   C = ux_uca_options:get_options([{strength, 3}]).
 92 |    ux_uca:sort_key(C, "Get L1-L3 weights").</pre>
 93 | 
 94 | 
 95 | 
 96 | 
 97 | ###<a name="Common_configurations">Common configurations</a>##
 98 | 
 99 | 
100 | 
101 | 
102 | ####<a name="Non-ignorable">Non-ignorable</a>##
103 | 
104 | 
105 |    
106 | Variable collation elements are not reset to be ignorable, but   
107 | get the weights explicitly mentioned in the file.
108 | 
109 | 
110 | 
111 | * SPACE would have the value [.0209.0020.0002]   
112 | * Capital A would be unchanged, with the value [.06D9.0020.0008]   
113 | * Ignorables are unchanged.
114 | 
115 | Example:
116 | <pre>   C = ux_uca_options:get_options(non_ignorable).
117 |    ux_uca:sort_key(C, "Non-ignorable collation sort key").</pre>
118 | 
119 | 
120 | 
121 | 
122 | ####<a name="Blanked">Blanked</a>##
123 | 
124 | 
125 |    
126 | Variable collation elements and any subsequent ignorables   
127 | are reset so that their weights at levels one through three are zero.   
128 | For example,
129 | 
130 | 
131 | 
132 | * SPACE would have the value [.0000.0000.0000]   
133 | * A combining grave accent after a space would have the value     
134 | [.0000.0000.0000]   
135 | * Capital A would be unchanged, with the value [.06D9.0020.0008]   
136 | * A combining grave accent after a Capital A would be unchanged
137 | 
138 | Example:
139 | <pre>   C = ux_uca_options:get_options(non_ignorable).
140 |    ux_uca:sort_key(C, "Blanked collation sort key").</pre>
141 | 
142 | 
143 | 
144 | 
145 | ####<a name="Shifted">Shifted</a>##
146 | 
147 | 
148 |    
149 | Variable collation elements are reset to zero at levels one through   
150 | three. In addition, a new fourth-level weight is appended, whose value   
151 | depends on the type, as shown in Table 12.   
152 | Any subsequent primary or secondary ignorables following a variable are reset   
153 | so that their weights at levels one through four are zero.
154 | 
155 | 
156 | 
157 | * A combining grave accent after a space would have the value     
158 | [.0000.0000.0000.0000].   
159 | * A combining grave accent after a Capital A would be unchanged.
160 | 
161 | Example:
162 | <pre>   C = ux_uca_options:get_options(shifted).
163 |    ux_uca:sort_key(C, "Shifted collation sort key").</pre>
164 | 
165 | 
166 | 
167 | 
168 | ####<a name="Shift-trimmed">Shift-trimmed</a>##
169 | 
170 | 
171 |    
172 | This option is the same as Shifted, except that all trailing   
173 | FFFFs are trimmed from the sort key.   
174 | This could be used to emulate POSIX behavior.
175 | 
176 | Example:
177 | <pre>   C = ux_uca_options:get_options(shift_trimmed).
178 |    ux_uca:sort_key(C, "Shift-trimmed collation sort key").</pre>
179 | 
180 | 
181 | <a name="types"></a>
182 | 
183 | ##Data Types##
184 | 
185 | 
186 | 
187 | 
188 | ###<a name="type-result">result()</a>##
189 | 
190 | 
191 | 
192 | <pre>result() = {[<a href="#type-uca_elem">uca_elem()</a>], string()}</pre>
193 | 
194 | 
195 | 
196 | ###<a name="type-search_result">search_result()</a>##
197 | 
198 | 
199 | 
200 | <pre>search_result() = {string(), string(), string()}</pre>
201 | 
202 | 
203 | 
204 | ###<a name="type-uca_alternate">uca_alternate()</a>##
205 | 
206 | 
207 | 
208 | <pre>uca_alternate() = shifted | shift_trimmed | non_ignorable | blanked</pre>
209 | 
210 | 
211 | 
212 | ###<a name="type-uca_array">uca_array()</a>##
213 | 
214 | 
215 | 
216 | <pre>uca_array() = [<a href="#type-uca_elem">uca_elem()</a>]</pre>
217 | 
218 | 
219 | 
220 | ###<a name="type-uca_case_first">uca_case_first()</a>##
221 | 
222 | 
223 | 
224 | <pre>uca_case_first() = lower | upper | off</pre>
225 | 
226 | 
227 | 
228 | ###<a name="type-uca_compare_result">uca_compare_result()</a>##
229 | 
230 | 
231 | 
232 | <pre>uca_compare_result() = lower | greater | equal</pre>
233 | 
234 | 
235 | 
236 | ###<a name="type-uca_elem">uca_elem()</a>##
237 | 
238 | 
239 | 
240 | <pre>uca_elem() = [atom() | <a href="#type-uca_weight">uca_weight()</a>]</pre>
241 | 
242 | 
243 | 
244 | ###<a name="type-uca_sort_key_format">uca_sort_key_format()</a>##
245 | 
246 | 
247 | 
248 | <pre>uca_sort_key_format() = binary | list | uncompressed</pre>
249 | 
250 | 
251 | 
252 | ###<a name="type-uca_strength">uca_strength()</a>##
253 | 
254 | 
255 | 
256 | <pre>uca_strength() = 1 | 2 | 3 | 4</pre>
257 | 
258 | 
259 | 
260 | ###<a name="type-uca_weight">uca_weight()</a>##
261 | 
262 | 
263 | 
264 | <pre>uca_weight() = integer()</pre>
265 | 
266 | 
267 | 
268 | ###<a name="type-uca_weights">uca_weights()</a>##
269 | 
270 | 
271 | 
272 | <pre>uca_weights() = [<a href="#type-uca_weight">uca_weight()</a>]</pre>
273 | <a name="index"></a>
274 | 
275 | ##Function Index##
276 | 
277 | 
278 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#compare-2">compare/2</a></td><td>Compare two strings and return: lower, greater or equal.</td></tr><tr><td valign="top"><a href="#compare-3">compare/3</a></td><td></td></tr><tr><td valign="top"><a href="#search-2">search/2</a></td><td></td></tr><tr><td valign="top"><a href="#search-3">search/3</a></td><td></td></tr><tr><td valign="top"><a href="#search-4">search/4</a></td><td></td></tr><tr><td valign="top"><a href="#sort-1">sort/1</a></td><td>Sort a list of strings.</td></tr><tr><td valign="top"><a href="#sort-2">sort/2</a></td><td>Sort a list of strings.</td></tr><tr><td valign="top"><a href="#sort_array-1">sort_array/1</a></td><td>Convert the unicode string to the
279 | <a href="http://unicode.org/reports/tr10/#Step_2" target="_top">collation element array</a></td></tr><tr><td valign="top"><a href="#sort_array-2">sort_array/2</a></td><td></td></tr><tr><td valign="top"><a href="#sort_key-1">sort_key/1</a></td><td>Convert the unicode string to the sort key.</td></tr><tr><td valign="top"><a href="#sort_key-2">sort_key/2</a></td><td></td></tr></table>
280 | 
281 | 
282 | <a name="functions"></a>
283 | 
284 | ##Function Details##
285 | 
286 | <a name="compare-2"></a>
287 | 
288 | ###compare/2##
289 | 
290 | 
291 | 
292 | 
293 | <pre>compare(S1::string(), S2::string()) -> <a href="#type-uca_compare_result">uca_compare_result()</a></pre>
294 | <br></br>
295 | 
296 | 
297 | 
298 | 
299 | Compare two strings and return: lower, greater or equal.<a name="compare-3"></a>
300 | 
301 | ###compare/3##
302 | 
303 | 
304 | 
305 | 
306 | <pre>compare(Uca_options::#uca_options{}, S1::string(), S2::string()) -> <a href="#type-uca_compare_result">uca_compare_result()</a></pre>
307 | <br></br>
308 | 
309 | 
310 | <a name="search-2"></a>
311 | 
312 | ###search/2##
313 | 
314 | 
315 | 
316 | 
317 | <pre>search(Target::string(), Pattern::string()) -> <a href="#type-search_result">search_result()</a></pre>
318 | <br></br>
319 | 
320 | 
321 | <a name="search-3"></a>
322 | 
323 | ###search/3##
324 | 
325 | 
326 | 
327 | 
328 | <pre>search(Target::string(), Pattern::string(), MatchStyle::atom()) -> <a href="#type-search_result">search_result()</a></pre>
329 | <br></br>
330 | 
331 | 
332 | <a name="search-4"></a>
333 | 
334 | ###search/4##
335 | 
336 | 
337 | 
338 | 
339 | <pre>search(Uca_options::#uca_options{}, Target::string(), Pattern::string(), MatchStyle::atom()) -> <a href="#type-search_result">search_result()</a></pre>
340 | <br></br>
341 | 
342 | 
343 | <a name="sort-1"></a>
344 | 
345 | ###sort/1##
346 | 
347 | 
348 | 
349 | 
350 | <pre>sort(Strings::[string()]) -&gt; [string()]</pre>
351 | <br></br>
352 | 
353 | 
354 | 
355 | 
356 | Sort a list of strings.<a name="sort-2"></a>
357 | 
358 | ###sort/2##
359 | 
360 | 
361 | 
362 | 
363 | <pre>sort(Uca_options::#uca_options{}, Strings::[string()]) -&gt; [string()]</pre>
364 | <br></br>
365 | 
366 | 
367 | 
368 | 
369 | Sort a list of strings.<a name="sort_array-1"></a>
370 | 
371 | ###sort_array/1##
372 | 
373 | 
374 | 
375 | 
376 | `sort_array(S) -> any()`
377 | 
378 | 
379 | 
380 | Convert the unicode string to the
381 | [collation element array](http://unicode.org/reports/tr10/#Step_2)<a name="sort_array-2"></a>
382 | 
383 | ###sort_array/2##
384 | 
385 | 
386 | 
387 | 
388 | `sort_array(C, S) -> any()`
389 | 
390 | <a name="sort_key-1"></a>
391 | 
392 | ###sort_key/1##
393 | 
394 | 
395 | 
396 | 
397 | `sort_key(S) -> any()`
398 | 
399 | 
400 | 
401 | Convert the unicode string to the sort key.<a name="sort_key-2"></a>
402 | 
403 | ###sort_key/2##
404 | 
405 | 
406 | 
407 | 
408 | `sort_key(C, S) -> any()`
409 | 
410 | 


--------------------------------------------------------------------------------
/doc/ux_wb.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #Module ux_wb#
 4 | * [Description](#description)
 5 | * [Function Index](#index)
 6 | * [Function Details](#functions)
 7 | 
 8 | 
 9 | Default Word Cluster Boundary Breaker.
10 | 
11 | <a name="description"></a>
12 | 
13 | ##Description##
14 | 
15 | 
16 | [UTR29: Word Cluster Boundaries]
17 | (http://unicode.org/reports/tr29/#Word_Cluster_Boundaries)<a name="index"></a>
18 | 
19 | ##Function Index##
20 | 
21 | 
22 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#split-1">split/1</a></td><td></td></tr><tr><td valign="top"><a href="#words-1">words/1</a></td><td></td></tr></table>
23 | 
24 | 
25 | <a name="functions"></a>
26 | 
27 | ##Function Details##
28 | 
29 | <a name="split-1"></a>
30 | 
31 | ###split/1##
32 | 
33 | 
34 | 
35 | 
36 | `split(S) -> any()`
37 | 
38 | <a name="words-1"></a>
39 | 
40 | ###words/1##
41 | 
42 | 
43 | 
44 | 
45 | `words(S) -> any()`
46 | 
47 | 


--------------------------------------------------------------------------------
/priv/UCA/CollationAuxiliary/allkeys_CLDR.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UCA/CollationAuxiliary/allkeys_CLDR.txt.gz


--------------------------------------------------------------------------------
/priv/UCA/allkeys.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UCA/allkeys.txt.gz


--------------------------------------------------------------------------------
/priv/UNIDATA/Blocks.txt:
--------------------------------------------------------------------------------
  1 | # Blocks-6.1.0.txt
  2 | # Date: 2011-06-14, 18:26:00 GMT [KW, LI]
  3 | #
  4 | # Unicode Character Database
  5 | # Copyright (c) 1991-2011 Unicode, Inc.
  6 | # For terms of use, see http://www.unicode.org/terms_of_use.html
  7 | # For documentation, see http://www.unicode.org/reports/tr44/
  8 | #
  9 | # Note:   The casing of block names is not normative.
 10 | #         For example, "Basic Latin" and "BASIC LATIN" are equivalent.
 11 | #
 12 | # Format:
 13 | # Start Code..End Code; Block Name
 14 | 
 15 | # ================================================
 16 | 
 17 | # Note:   When comparing block names, casing, whitespace, hyphens,
 18 | #         and underbars are ignored.
 19 | #         For example, "Latin Extended-A" and "latin extended a" are equivalent.
 20 | #         For more information on the comparison of property values, 
 21 | #            see UAX #44: http://www.unicode.org/reports/tr44/
 22 | #
 23 | #  All code points not explicitly listed for Block
 24 | #  have the value No_Block.
 25 | 
 26 | # Property:	Block
 27 | #
 28 | # @missing: 0000..10FFFF; No_Block
 29 | 
 30 | 0000..007F; Basic Latin
 31 | 0080..00FF; Latin-1 Supplement
 32 | 0100..017F; Latin Extended-A
 33 | 0180..024F; Latin Extended-B
 34 | 0250..02AF; IPA Extensions
 35 | 02B0..02FF; Spacing Modifier Letters
 36 | 0300..036F; Combining Diacritical Marks
 37 | 0370..03FF; Greek and Coptic
 38 | 0400..04FF; Cyrillic
 39 | 0500..052F; Cyrillic Supplement
 40 | 0530..058F; Armenian
 41 | 0590..05FF; Hebrew
 42 | 0600..06FF; Arabic
 43 | 0700..074F; Syriac
 44 | 0750..077F; Arabic Supplement
 45 | 0780..07BF; Thaana
 46 | 07C0..07FF; NKo
 47 | 0800..083F; Samaritan
 48 | 0840..085F; Mandaic
 49 | 08A0..08FF; Arabic Extended-A
 50 | 0900..097F; Devanagari
 51 | 0980..09FF; Bengali
 52 | 0A00..0A7F; Gurmukhi
 53 | 0A80..0AFF; Gujarati
 54 | 0B00..0B7F; Oriya
 55 | 0B80..0BFF; Tamil
 56 | 0C00..0C7F; Telugu
 57 | 0C80..0CFF; Kannada
 58 | 0D00..0D7F; Malayalam
 59 | 0D80..0DFF; Sinhala
 60 | 0E00..0E7F; Thai
 61 | 0E80..0EFF; Lao
 62 | 0F00..0FFF; Tibetan
 63 | 1000..109F; Myanmar
 64 | 10A0..10FF; Georgian
 65 | 1100..11FF; Hangul Jamo
 66 | 1200..137F; Ethiopic
 67 | 1380..139F; Ethiopic Supplement
 68 | 13A0..13FF; Cherokee
 69 | 1400..167F; Unified Canadian Aboriginal Syllabics
 70 | 1680..169F; Ogham
 71 | 16A0..16FF; Runic
 72 | 1700..171F; Tagalog
 73 | 1720..173F; Hanunoo
 74 | 1740..175F; Buhid
 75 | 1760..177F; Tagbanwa
 76 | 1780..17FF; Khmer
 77 | 1800..18AF; Mongolian
 78 | 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
 79 | 1900..194F; Limbu
 80 | 1950..197F; Tai Le
 81 | 1980..19DF; New Tai Lue
 82 | 19E0..19FF; Khmer Symbols
 83 | 1A00..1A1F; Buginese
 84 | 1A20..1AAF; Tai Tham
 85 | 1B00..1B7F; Balinese
 86 | 1B80..1BBF; Sundanese
 87 | 1BC0..1BFF; Batak
 88 | 1C00..1C4F; Lepcha
 89 | 1C50..1C7F; Ol Chiki
 90 | 1CC0..1CCF; Sundanese Supplement
 91 | 1CD0..1CFF; Vedic Extensions
 92 | 1D00..1D7F; Phonetic Extensions
 93 | 1D80..1DBF; Phonetic Extensions Supplement
 94 | 1DC0..1DFF; Combining Diacritical Marks Supplement
 95 | 1E00..1EFF; Latin Extended Additional
 96 | 1F00..1FFF; Greek Extended
 97 | 2000..206F; General Punctuation
 98 | 2070..209F; Superscripts and Subscripts
 99 | 20A0..20CF; Currency Symbols
100 | 20D0..20FF; Combining Diacritical Marks for Symbols
101 | 2100..214F; Letterlike Symbols
102 | 2150..218F; Number Forms
103 | 2190..21FF; Arrows
104 | 2200..22FF; Mathematical Operators
105 | 2300..23FF; Miscellaneous Technical
106 | 2400..243F; Control Pictures
107 | 2440..245F; Optical Character Recognition
108 | 2460..24FF; Enclosed Alphanumerics
109 | 2500..257F; Box Drawing
110 | 2580..259F; Block Elements
111 | 25A0..25FF; Geometric Shapes
112 | 2600..26FF; Miscellaneous Symbols
113 | 2700..27BF; Dingbats
114 | 27C0..27EF; Miscellaneous Mathematical Symbols-A
115 | 27F0..27FF; Supplemental Arrows-A
116 | 2800..28FF; Braille Patterns
117 | 2900..297F; Supplemental Arrows-B
118 | 2980..29FF; Miscellaneous Mathematical Symbols-B
119 | 2A00..2AFF; Supplemental Mathematical Operators
120 | 2B00..2BFF; Miscellaneous Symbols and Arrows
121 | 2C00..2C5F; Glagolitic
122 | 2C60..2C7F; Latin Extended-C
123 | 2C80..2CFF; Coptic
124 | 2D00..2D2F; Georgian Supplement
125 | 2D30..2D7F; Tifinagh
126 | 2D80..2DDF; Ethiopic Extended
127 | 2DE0..2DFF; Cyrillic Extended-A
128 | 2E00..2E7F; Supplemental Punctuation
129 | 2E80..2EFF; CJK Radicals Supplement
130 | 2F00..2FDF; Kangxi Radicals
131 | 2FF0..2FFF; Ideographic Description Characters
132 | 3000..303F; CJK Symbols and Punctuation
133 | 3040..309F; Hiragana
134 | 30A0..30FF; Katakana
135 | 3100..312F; Bopomofo
136 | 3130..318F; Hangul Compatibility Jamo
137 | 3190..319F; Kanbun
138 | 31A0..31BF; Bopomofo Extended
139 | 31C0..31EF; CJK Strokes
140 | 31F0..31FF; Katakana Phonetic Extensions
141 | 3200..32FF; Enclosed CJK Letters and Months
142 | 3300..33FF; CJK Compatibility
143 | 3400..4DBF; CJK Unified Ideographs Extension A
144 | 4DC0..4DFF; Yijing Hexagram Symbols
145 | 4E00..9FFF; CJK Unified Ideographs
146 | A000..A48F; Yi Syllables
147 | A490..A4CF; Yi Radicals
148 | A4D0..A4FF; Lisu
149 | A500..A63F; Vai
150 | A640..A69F; Cyrillic Extended-B
151 | A6A0..A6FF; Bamum
152 | A700..A71F; Modifier Tone Letters
153 | A720..A7FF; Latin Extended-D
154 | A800..A82F; Syloti Nagri
155 | A830..A83F; Common Indic Number Forms
156 | A840..A87F; Phags-pa
157 | A880..A8DF; Saurashtra
158 | A8E0..A8FF; Devanagari Extended
159 | A900..A92F; Kayah Li
160 | A930..A95F; Rejang
161 | A960..A97F; Hangul Jamo Extended-A
162 | A980..A9DF; Javanese
163 | AA00..AA5F; Cham
164 | AA60..AA7F; Myanmar Extended-A
165 | AA80..AADF; Tai Viet
166 | AAE0..AAFF; Meetei Mayek Extensions
167 | AB00..AB2F; Ethiopic Extended-A
168 | ABC0..ABFF; Meetei Mayek
169 | AC00..D7AF; Hangul Syllables
170 | D7B0..D7FF; Hangul Jamo Extended-B
171 | D800..DB7F; High Surrogates
172 | DB80..DBFF; High Private Use Surrogates
173 | DC00..DFFF; Low Surrogates
174 | E000..F8FF; Private Use Area
175 | F900..FAFF; CJK Compatibility Ideographs
176 | FB00..FB4F; Alphabetic Presentation Forms
177 | FB50..FDFF; Arabic Presentation Forms-A
178 | FE00..FE0F; Variation Selectors
179 | FE10..FE1F; Vertical Forms
180 | FE20..FE2F; Combining Half Marks
181 | FE30..FE4F; CJK Compatibility Forms
182 | FE50..FE6F; Small Form Variants
183 | FE70..FEFF; Arabic Presentation Forms-B
184 | FF00..FFEF; Halfwidth and Fullwidth Forms
185 | FFF0..FFFF; Specials
186 | 10000..1007F; Linear B Syllabary
187 | 10080..100FF; Linear B Ideograms
188 | 10100..1013F; Aegean Numbers
189 | 10140..1018F; Ancient Greek Numbers
190 | 10190..101CF; Ancient Symbols
191 | 101D0..101FF; Phaistos Disc
192 | 10280..1029F; Lycian
193 | 102A0..102DF; Carian
194 | 10300..1032F; Old Italic
195 | 10330..1034F; Gothic
196 | 10380..1039F; Ugaritic
197 | 103A0..103DF; Old Persian
198 | 10400..1044F; Deseret
199 | 10450..1047F; Shavian
200 | 10480..104AF; Osmanya
201 | 10800..1083F; Cypriot Syllabary
202 | 10840..1085F; Imperial Aramaic
203 | 10900..1091F; Phoenician
204 | 10920..1093F; Lydian
205 | 10980..1099F; Meroitic Hieroglyphs
206 | 109A0..109FF; Meroitic Cursive
207 | 10A00..10A5F; Kharoshthi
208 | 10A60..10A7F; Old South Arabian
209 | 10B00..10B3F; Avestan
210 | 10B40..10B5F; Inscriptional Parthian
211 | 10B60..10B7F; Inscriptional Pahlavi
212 | 10C00..10C4F; Old Turkic
213 | 10E60..10E7F; Rumi Numeral Symbols
214 | 11000..1107F; Brahmi
215 | 11080..110CF; Kaithi
216 | 110D0..110FF; Sora Sompeng
217 | 11100..1114F; Chakma
218 | 11180..111DF; Sharada
219 | 11680..116CF; Takri
220 | 12000..123FF; Cuneiform
221 | 12400..1247F; Cuneiform Numbers and Punctuation
222 | 13000..1342F; Egyptian Hieroglyphs
223 | 16800..16A3F; Bamum Supplement
224 | 16F00..16F9F; Miao
225 | 1B000..1B0FF; Kana Supplement
226 | 1D000..1D0FF; Byzantine Musical Symbols
227 | 1D100..1D1FF; Musical Symbols
228 | 1D200..1D24F; Ancient Greek Musical Notation
229 | 1D300..1D35F; Tai Xuan Jing Symbols
230 | 1D360..1D37F; Counting Rod Numerals
231 | 1D400..1D7FF; Mathematical Alphanumeric Symbols
232 | 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
233 | 1F000..1F02F; Mahjong Tiles
234 | 1F030..1F09F; Domino Tiles
235 | 1F0A0..1F0FF; Playing Cards
236 | 1F100..1F1FF; Enclosed Alphanumeric Supplement
237 | 1F200..1F2FF; Enclosed Ideographic Supplement
238 | 1F300..1F5FF; Miscellaneous Symbols And Pictographs
239 | 1F600..1F64F; Emoticons
240 | 1F680..1F6FF; Transport And Map Symbols
241 | 1F700..1F77F; Alchemical Symbols
242 | 20000..2A6DF; CJK Unified Ideographs Extension B
243 | 2A700..2B73F; CJK Unified Ideographs Extension C
244 | 2B740..2B81F; CJK Unified Ideographs Extension D
245 | 2F800..2FA1F; CJK Compatibility Ideographs Supplement
246 | E0000..E007F; Tags
247 | E0100..E01EF; Variation Selectors Supplement
248 | F0000..FFFFF; Supplementary Private Use Area-A
249 | 100000..10FFFF; Supplementary Private Use Area-B
250 | 
251 | # EOF


--------------------------------------------------------------------------------
/priv/UNIDATA/CompositionExclusions.txt:
--------------------------------------------------------------------------------
  1 | # CompositionExclusions-6.1.0.txt
  2 | # Date: 2011-07-12, 00:13:00 GMT [KW, LI]
  3 | #
  4 | # This file lists the characters for the Composition Exclusion Table
  5 | # defined in UAX #15, Unicode Normalization Forms.
  6 | #
  7 | # This file is a normative contributory data file in the
  8 | # Unicode Character Database.
  9 | #
 10 | # Copyright (c) 1991-2011 Unicode, Inc.
 11 | # For terms of use, see http://www.unicode.org/terms_of_use.html
 12 | #
 13 | # For more information, see
 14 | # http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table
 15 | #
 16 | # For a full derivation of composition exclusions, see the derived property
 17 | # Full_Composition_Exclusion in DerivedNormalizationProps.txt
 18 | #
 19 | 
 20 | # ================================================
 21 | # (1) Script Specifics
 22 | #
 23 | # This list of characters cannot be derived from the UnicodeData.txt file.
 24 | # ================================================
 25 | 
 26 | 0958    #  DEVANAGARI LETTER QA
 27 | 0959    #  DEVANAGARI LETTER KHHA
 28 | 095A    #  DEVANAGARI LETTER GHHA
 29 | 095B    #  DEVANAGARI LETTER ZA
 30 | 095C    #  DEVANAGARI LETTER DDDHA
 31 | 095D    #  DEVANAGARI LETTER RHA
 32 | 095E    #  DEVANAGARI LETTER FA
 33 | 095F    #  DEVANAGARI LETTER YYA
 34 | 09DC    #  BENGALI LETTER RRA
 35 | 09DD    #  BENGALI LETTER RHA
 36 | 09DF    #  BENGALI LETTER YYA
 37 | 0A33    #  GURMUKHI LETTER LLA
 38 | 0A36    #  GURMUKHI LETTER SHA
 39 | 0A59    #  GURMUKHI LETTER KHHA
 40 | 0A5A    #  GURMUKHI LETTER GHHA
 41 | 0A5B    #  GURMUKHI LETTER ZA
 42 | 0A5E    #  GURMUKHI LETTER FA
 43 | 0B5C    #  ORIYA LETTER RRA
 44 | 0B5D    #  ORIYA LETTER RHA
 45 | 0F43    #  TIBETAN LETTER GHA
 46 | 0F4D    #  TIBETAN LETTER DDHA
 47 | 0F52    #  TIBETAN LETTER DHA
 48 | 0F57    #  TIBETAN LETTER BHA
 49 | 0F5C    #  TIBETAN LETTER DZHA
 50 | 0F69    #  TIBETAN LETTER KSSA
 51 | 0F76    #  TIBETAN VOWEL SIGN VOCALIC R
 52 | 0F78    #  TIBETAN VOWEL SIGN VOCALIC L
 53 | 0F93    #  TIBETAN SUBJOINED LETTER GHA
 54 | 0F9D    #  TIBETAN SUBJOINED LETTER DDHA
 55 | 0FA2    #  TIBETAN SUBJOINED LETTER DHA
 56 | 0FA7    #  TIBETAN SUBJOINED LETTER BHA
 57 | 0FAC    #  TIBETAN SUBJOINED LETTER DZHA
 58 | 0FB9    #  TIBETAN SUBJOINED LETTER KSSA
 59 | FB1D    #  HEBREW LETTER YOD WITH HIRIQ
 60 | FB1F    #  HEBREW LIGATURE YIDDISH YOD YOD PATAH
 61 | FB2A    #  HEBREW LETTER SHIN WITH SHIN DOT
 62 | FB2B    #  HEBREW LETTER SHIN WITH SIN DOT
 63 | FB2C    #  HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
 64 | FB2D    #  HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
 65 | FB2E    #  HEBREW LETTER ALEF WITH PATAH
 66 | FB2F    #  HEBREW LETTER ALEF WITH QAMATS
 67 | FB30    #  HEBREW LETTER ALEF WITH MAPIQ
 68 | FB31    #  HEBREW LETTER BET WITH DAGESH
 69 | FB32    #  HEBREW LETTER GIMEL WITH DAGESH
 70 | FB33    #  HEBREW LETTER DALET WITH DAGESH
 71 | FB34    #  HEBREW LETTER HE WITH MAPIQ
 72 | FB35    #  HEBREW LETTER VAV WITH DAGESH
 73 | FB36    #  HEBREW LETTER ZAYIN WITH DAGESH
 74 | FB38    #  HEBREW LETTER TET WITH DAGESH
 75 | FB39    #  HEBREW LETTER YOD WITH DAGESH
 76 | FB3A    #  HEBREW LETTER FINAL KAF WITH DAGESH
 77 | FB3B    #  HEBREW LETTER KAF WITH DAGESH
 78 | FB3C    #  HEBREW LETTER LAMED WITH DAGESH
 79 | FB3E    #  HEBREW LETTER MEM WITH DAGESH
 80 | FB40    #  HEBREW LETTER NUN WITH DAGESH
 81 | FB41    #  HEBREW LETTER SAMEKH WITH DAGESH
 82 | FB43    #  HEBREW LETTER FINAL PE WITH DAGESH
 83 | FB44    #  HEBREW LETTER PE WITH DAGESH
 84 | FB46    #  HEBREW LETTER TSADI WITH DAGESH
 85 | FB47    #  HEBREW LETTER QOF WITH DAGESH
 86 | FB48    #  HEBREW LETTER RESH WITH DAGESH
 87 | FB49    #  HEBREW LETTER SHIN WITH DAGESH
 88 | FB4A    #  HEBREW LETTER TAV WITH DAGESH
 89 | FB4B    #  HEBREW LETTER VAV WITH HOLAM
 90 | FB4C    #  HEBREW LETTER BET WITH RAFE
 91 | FB4D    #  HEBREW LETTER KAF WITH RAFE
 92 | FB4E    #  HEBREW LETTER PE WITH RAFE
 93 | 
 94 | # Total code points: 67
 95 | 
 96 | # ================================================
 97 | # (2) Post Composition Version precomposed characters
 98 | #
 99 | # These characters cannot be derived solely from the UnicodeData.txt file
100 | # in this version of Unicode.
101 | #
102 | # Note that characters added to the standard after the
103 | # Composition Version and which have canonical decomposition mappings
104 | # are not automatically added to this list of Post Composition
105 | # Version precomposed characters.
106 | # ================================================
107 | 
108 | 2ADC    #  FORKING
109 | 1D15E   #  MUSICAL SYMBOL HALF NOTE
110 | 1D15F   #  MUSICAL SYMBOL QUARTER NOTE
111 | 1D160   #  MUSICAL SYMBOL EIGHTH NOTE
112 | 1D161   #  MUSICAL SYMBOL SIXTEENTH NOTE
113 | 1D162   #  MUSICAL SYMBOL THIRTY-SECOND NOTE
114 | 1D163   #  MUSICAL SYMBOL SIXTY-FOURTH NOTE
115 | 1D164   #  MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
116 | 1D1BB   #  MUSICAL SYMBOL MINIMA
117 | 1D1BC   #  MUSICAL SYMBOL MINIMA BLACK
118 | 1D1BD   #  MUSICAL SYMBOL SEMIMINIMA WHITE
119 | 1D1BE   #  MUSICAL SYMBOL SEMIMINIMA BLACK
120 | 1D1BF   #  MUSICAL SYMBOL FUSA WHITE
121 | 1D1C0   #  MUSICAL SYMBOL FUSA BLACK
122 | 
123 | # Total code points: 14
124 | 
125 | # ================================================
126 | # (3) Singleton Decompositions
127 | #
128 | # These characters can be derived from the UnicodeData.txt file
129 | # by including all canonically decomposable characters whose
130 | # canonical decomposition consists of a single character.
131 | #
132 | # These characters are simply quoted here for reference.
133 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
134 | # ================================================
135 | 
136 | # 0340..0341       [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
137 | # 0343                 COMBINING GREEK KORONIS
138 | # 0374                 GREEK NUMERAL SIGN
139 | # 037E                 GREEK QUESTION MARK
140 | # 0387                 GREEK ANO TELEIA
141 | # 1F71                 GREEK SMALL LETTER ALPHA WITH OXIA
142 | # 1F73                 GREEK SMALL LETTER EPSILON WITH OXIA
143 | # 1F75                 GREEK SMALL LETTER ETA WITH OXIA
144 | # 1F77                 GREEK SMALL LETTER IOTA WITH OXIA
145 | # 1F79                 GREEK SMALL LETTER OMICRON WITH OXIA
146 | # 1F7B                 GREEK SMALL LETTER UPSILON WITH OXIA
147 | # 1F7D                 GREEK SMALL LETTER OMEGA WITH OXIA
148 | # 1FBB                 GREEK CAPITAL LETTER ALPHA WITH OXIA
149 | # 1FBE                 GREEK PROSGEGRAMMENI
150 | # 1FC9                 GREEK CAPITAL LETTER EPSILON WITH OXIA
151 | # 1FCB                 GREEK CAPITAL LETTER ETA WITH OXIA
152 | # 1FD3                 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
153 | # 1FDB                 GREEK CAPITAL LETTER IOTA WITH OXIA
154 | # 1FE3                 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
155 | # 1FEB                 GREEK CAPITAL LETTER UPSILON WITH OXIA
156 | # 1FEE..1FEF       [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
157 | # 1FF9                 GREEK CAPITAL LETTER OMICRON WITH OXIA
158 | # 1FFB                 GREEK CAPITAL LETTER OMEGA WITH OXIA
159 | # 1FFD                 GREEK OXIA
160 | # 2000..2001       [2] EN QUAD..EM QUAD
161 | # 2126                 OHM SIGN
162 | # 212A..212B       [2] KELVIN SIGN..ANGSTROM SIGN
163 | # 2329                 LEFT-POINTING ANGLE BRACKET
164 | # 232A                 RIGHT-POINTING ANGLE BRACKET
165 | # F900..FA0D     [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
166 | # FA10                 CJK COMPATIBILITY IDEOGRAPH-FA10
167 | # FA12                 CJK COMPATIBILITY IDEOGRAPH-FA12
168 | # FA15..FA1E      [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
169 | # FA20                 CJK COMPATIBILITY IDEOGRAPH-FA20
170 | # FA22                 CJK COMPATIBILITY IDEOGRAPH-FA22
171 | # FA25..FA26       [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
172 | # FA2A..FA6D      [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
173 | # FA70..FAD9     [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
174 | # 2F800..2FA1D   [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
175 | 
176 | # Total code points: 1035
177 | 
178 | # ================================================
179 | # (4) Non-Starter Decompositions
180 | #
181 | # These characters can be derived from the UnicodeData.txt file
182 | # by including each expanding canonical decomposition
183 | # (i.e., those which canonically decompose to a sequence
184 | # of characters instead of a single character), such that:
185 | #
186 | # A. The character is not a Starter.
187 | #
188 | # OR (inclusive)
189 | #
190 | # B. The character's canonical decomposition begins
191 | # with a character that is not a Starter.
192 | #
193 | # Note that a "Starter" is any character with a zero combining class.
194 | #
195 | # These characters are simply quoted here for reference.
196 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
197 | # ================================================
198 | 
199 | # 0344                 COMBINING GREEK DIALYTIKA TONOS
200 | # 0F73                 TIBETAN VOWEL SIGN II
201 | # 0F75                 TIBETAN VOWEL SIGN UU
202 | # 0F81                 TIBETAN VOWEL SIGN REVERSED II
203 | 
204 | # Total code points: 4
205 | 
206 | 


--------------------------------------------------------------------------------
/priv/UNIDATA/DerivedNormalizationProps.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UNIDATA/DerivedNormalizationProps.txt.gz


--------------------------------------------------------------------------------
/priv/UNIDATA/UnicodeData.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UNIDATA/UnicodeData.txt.gz


--------------------------------------------------------------------------------
/priv/UNIDATA/auxiliary/GraphemeBreakProperty.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UNIDATA/auxiliary/GraphemeBreakProperty.txt.gz


--------------------------------------------------------------------------------
/priv/UNIDATA/auxiliary/GraphemeBreakTest.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UNIDATA/auxiliary/GraphemeBreakTest.txt.gz


--------------------------------------------------------------------------------
/priv/UNIDATA/auxiliary/WordBreakProperty.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UNIDATA/auxiliary/WordBreakProperty.txt.gz


--------------------------------------------------------------------------------
/priv/UNIDATA/auxiliary/WordBreakTest.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/priv/UNIDATA/auxiliary/WordBreakTest.txt.gz


--------------------------------------------------------------------------------
/rebar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/rebar


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | {erl_opts, [
 3 | %   {d, 'UNIDATA_DEBUG'},
 4 |     {d, 'SLOW_TESTS'},
 5 |     debug_info]}.
 6 | {deps, [
 7 | %% We needs the reloader from Mochiweb
 8 | % {mochiweb, ".*",
 9 | %  {git, "git://github.com/mochi/mochiweb.git", "master"}},
10 | 
11 | %% Comment to run doc generation for github:
12 | %  {edown, ".*", {git, "git://github.com/esl/edown.git", "master"}},
13 | 
14 |     {metamodule, ".*",
15 |         {git, "git://github.com/arcusfelis/metamodule.git", "master"}}
16 | ]}.
17 | {cover_enabled, true}.
18 | {eunit_opts, [verbose, {report,{eunit_surefire,[{dir,"."}]}}]}.
19 | {edoc_opts, [{doclet, edown_doclet},
20 |             {src_path, ["src/"]},
21 |             {subpackages, true}]}.
22 | 
23 | 


--------------------------------------------------------------------------------
/src/uca/ux.hrl:
--------------------------------------------------------------------------------
1 | -include("../ux.hrl").
2 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca.hrl:
--------------------------------------------------------------------------------
 1 | % vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80:
 2 | %%% =====================================================================
 3 | %%% This library is free software; you can redistribute it and/or modify
 4 | %%% it under the terms of the GNU Lesser General Public License as
 5 | %%% published by the Free Software Foundation; either version 2 of the
 6 | %%% License, or (at your option) any later version.
 7 | %%%
 8 | %%% This library is distributed in the hope that it will be useful, but
 9 | %%% WITHOUT ANY WARRANTY; without even the implied warranty of
10 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | %%% Lesser General Public License for more details.
12 | %%%
13 | %%% You should have received a copy of the GNU Lesser General Public
14 | %%% License along with this library; if not, write to the Free Software
15 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
16 | %%% USA
17 | %%%
18 | %%% $Id$
19 | %%%
20 | %%% @copyright 2010-2011 Michael Uvarov
21 | %%% @author Michael Uvarov <arcusfelis@gmail.com>
22 | %%% @see ux
23 | %%% @end
24 | %%% =====================================================================
25 | 
26 | % TERMINATOR < T < V < L
27 | -define(COL_HANGUL_TERMINATOR, 6139). 
28 | 
29 | % Records
30 | -record(uca_options, {
31 |     % Generator options
32 |     hangul_terminator = ?COL_HANGUL_TERMINATOR :: ux_uca:uca_weight(),
33 |     natural_sort = true :: boolean(),
34 |     strength = 4 :: ux_uca:uca_strength(),
35 |     alternate = shifted :: ux_uca:uca_alternate(),
36 |     case_sensitive = false, % move L3 to L1 if true
37 |     case_first = lower :: ux_uca:uca_case_first(), 
38 |     backwards = false :: boolean(),
39 | 
40 |     % Other options
41 |     sort_key_format = binary :: ux_uca:uca_sort_key_format()
42 |     }).
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | %%
55 | %% Some constants
56 | %%
57 | 
58 | 
59 | 
60 | % Hangul & UCA
61 | -define(COL_HANGUL_LBASE, 12603). 
62 | -define(COL_HANGUL_VBASE, 12729). 
63 | -define(COL_HANGUL_TBASE, 12799). % 31FF
64 | 
65 | -define(COL_HANGUL_LLAST, (?COL_HANGUL_LBASE + ?HANGUL_LCOUNT)).
66 | -define(COL_HANGUL_VLAST, (?COL_HANGUL_VBASE + ?HANGUL_VCOUNT)).
67 | -define(COL_HANGUL_TLAST, 12850).
68 | 
69 | % Weight on level 1 (L1) is L1 of Hangul jamo L.
70 | -define(IS_L1_OF_HANGUL_L(W), (
71 |  (W>=?COL_HANGUL_LBASE) and (W=<?COL_HANGUL_LLAST)
72 | )).
73 | 
74 | % Weight on level 1 (L1) is L1 of Hangul jamo V.
75 | -define(IS_L1_OF_HANGUL_V(W), (
76 |  (W>=?COL_HANGUL_VBASE) and (W=<?COL_HANGUL_VLAST)
77 | )).
78 | 
79 | % Weight on level 1 (L1) is L1 of Hangul jamo T.
80 | -define(IS_L1_OF_HANGUL_T(W), (
81 |  (W>=?COL_HANGUL_TBASE) and (W=<?COL_HANGUL_TLAST)
82 | )).
83 | 
84 | 
85 | -define(COL_DECIMAL_START, 5578). % L1 of 0
86 | -define(COL_DECIMAL_END,   (?COL_DECIMAL_START+9)). % L1 of 9
87 | -define(IS_L1_OF_DECIMAL(W), (
88 |  (W>=?COL_DECIMAL_START) and (W=<?COL_DECIMAL_END)
89 | )).
90 | -define(COL_WEIGHT_TO_DECIMAL(W), (
91 |  W - ?COL_DECIMAL_START
92 | )).
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_alt.erl:
--------------------------------------------------------------------------------
  1 | %%% Convert bin weight to [int].
  2 | %%% @private
  3 | -module(ux_uca_alt).
  4 | -export([get_alternate_function/2]).
  5 | -include("ux_uca.hrl").
  6 | 
  7 | -spec get_alternate_function(#uca_options{}, fun()) -> fun().
  8 | get_alternate_function(#uca_options{alternate='shifted', strength=4}, D) ->
  9 |     R = D({reassign_function, 4}),
 10 |     Common = R(get_common_value), 
 11 |     shifted_weight(Common);
 12 | 
 13 | get_alternate_function(C=#uca_options{alternate=Alt}, _D) ->
 14 |     get_function(Alt).
 15 | 
 16 |     
 17 | -spec get_function(Alt :: ux_uca:uca_alternate()) -> fun().
 18 | get_function(non_ignorable) ->
 19 |     fun non_ignorable_weight/1;
 20 | get_function(blanked) ->
 21 |     fun blanked_weight/1;
 22 | % Strength<4
 23 | get_function(shifted) ->
 24 |     fun short_shifted_weight/1;
 25 | get_function(shift_trimmed) ->
 26 |     fun shift_trimmed_weight/1.
 27 | 
 28 | 
 29 | non_ignorable_weight(Value) ->
 30 |         {fun non_ignorable_weight/1, weight(Value)}.
 31 | 
 32 | weight([_Var|L]) -> L.
 33 | 
 34 | %% @private
 35 | % If it is a tertiary ignorable, then L4 = 0.
 36 | shifted_weight(Common) ->
 37 |     fun(['variable',0,0,0,_]) ->
 38 |         {shifted_weight2(Common), []}; % [0,0,0,0]
 39 |        (['non_variable',0,0,0,_]) ->
 40 |         {shifted_weight(Common), []}; % [0,0,0,0]
 41 | % If it is a variable, then L4 = Old L1.
 42 |        (['variable',L1|_]) ->
 43 |         {shifted_weight2(Common), [0, 0, 0, L1]};
 44 |        ([_|_] = Value) ->
 45 |         {shifted_weight(Common), set_l4_to_value(Value, Common)}
 46 |     end.
 47 | 
 48 | 
 49 | %% @doc This function is a version of shifted_weight/1, but its value is
 50 | %%      after variable.
 51 | %% @end
 52 | %% @private
 53 | % If it is a ignorable, then L4 = 0.
 54 | shifted_weight2(Common) ->
 55 |     fun([_Var,0,_,_,_]) ->
 56 |         {shifted_weight2(Common), []}; % [0,0,0,0]
 57 | % If it is a variable, then L4 = Old L1.
 58 |        ([variable,L1|_]) ->
 59 |         {shifted_weight2(Common), [0, 0, 0, L1]};
 60 |        ([_|_] = Value) ->
 61 |         {shifted_weight(Common), set_l4_to_value(Value, Common)}
 62 |     end.
 63 | 
 64 | 
 65 | 
 66 | %% This realizations is faster.
 67 | %% When strenght < 4
 68 | 
 69 | %% @private
 70 | % If it is a tertiary ignorable, then L4 = 0.
 71 | short_shifted_weight(['non_variable',0,0,0|_]) ->
 72 |     {fun short_shifted_weight/1, []}; % [0,0,0,0]
 73 | % If it is a variable, then L4 = Old L1.
 74 | short_shifted_weight(['variable'|_]) ->
 75 |     {fun short_shifted_weight2/1, []};
 76 | short_shifted_weight([_|T]) ->
 77 |     {fun short_shifted_weight/1, T}.
 78 | 
 79 | 
 80 | %% @doc This function is a version of shifted_weight/1, but its value is
 81 | %%      after variable.
 82 | %% @end
 83 | %% @private
 84 | % If it is a ignorable, then L4 = 0.
 85 | short_shifted_weight2([_Var,0,_,_|_]) ->
 86 |     {fun short_shifted_weight2/1, []}; % [0,0,0,0]
 87 | % If it is a variable, then L4 = Old L1.
 88 | short_shifted_weight2(['variable'|_]) ->
 89 |     {fun short_shifted_weight2/1, []};
 90 | short_shifted_weight2([_|T]) ->
 91 |     {fun short_shifted_weight/1, T}.
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | %% @private
103 | %% Alternate=Shifted, Strength=L3
104 | blanked_weight([variable|_]) ->
105 |     {fun blanked_weight2/1, []}; % [0,0,0]
106 | blanked_weight([_|_] = Value) ->
107 |     {fun blanked_weight/1, weight(Value)}.
108 | 
109 | %% @private
110 | blanked_weight2([_Var, 0|_]) ->
111 |     {fun blanked_weight2/1, []};
112 | blanked_weight2(['variable'|_]) ->
113 |     {fun blanked_weight2/1, []};
114 | blanked_weight2([_|_] = Value) ->
115 |     {fun blanked_weight/1, weight(Value)}.
116 | 
117 | 
118 | %% @private
119 | % If it is a tertiary ignorable, then L4 = 0.
120 | shift_trimmed_weight([_Var, 0|_]) ->
121 |     {fun shift_trimmed_weight/1, []}; % [0,0,0,0]
122 | % If it is a variable, then L4 = Old L1.
123 | shift_trimmed_weight([variable, L1|_]) ->
124 |     {fun shift_trimmed_weight2/1, [0, 0, 0, L1]};
125 | shift_trimmed_weight([_|_] = Value) ->
126 |     {fun shift_trimmed_weight/1, set_l4_to_value(Value, 0)}.
127 | 
128 | %% @doc This function is a version of shifted_weight/1, but its value is
129 | %%      after variable.
130 | %% @end
131 | %% @private
132 | % If it is a ignorable, then L4 = 0.
133 | shift_trimmed_weight2([_Var,0|_]) ->
134 |     {fun shift_trimmed_weight2/1, []}; % [0,0,0,0]
135 | % If it is a variable, then L4 = Old L1.
136 | shift_trimmed_weight2([variable, L1|_]) ->
137 |     {fun shift_trimmed_weight2/1, [0, 0, 0, L1]};
138 | shift_trimmed_weight2([_|_] = Value) ->
139 |     {fun shift_trimmed_weight/1, set_l4_to_value(Value, 0)}.
140 | 
141 | 
142 | %% @private
143 | %% Return: [_, _, _, _]
144 | set_l4_to_value([Var, L1, L2, L3, _L4], NewL4) ->
145 |     [L1, L2, L3, NewL4].
146 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_compress.erl:
--------------------------------------------------------------------------------
  1 | -module(ux_uca_compress).
  2 | -export([reassign_fun/3]).
  3 | -include("ux.hrl").
  4 | 
  5 | 
  6 | %% Return a function for each level (1 - 4).
  7 | reassign_fun(_Lvl=1, _Min, Max) ->
  8 |     Len = get_bits_len(Max),
  9 |     fun({to_binary, W}) -> <<<<X:Len>> || X <- W>> end;
 10 | reassign_fun(Lvl, Min, OldMax) ->
 11 |     {Common, Max} = get_common_value(Lvl, OldMax),
 12 |     TopSize = Max - Common,
 13 |     BottomSize = Common - Min,
 14 |     NewMin = 1,
 15 |     NewMax = get_new_max(Max),
 16 |     MaxBottom = NewMax - TopSize,
 17 |     MinTop = NewMin + BottomSize,
 18 |     GapSize = MaxBottom - MinTop,
 19 |     true = (GapSize > 0),
 20 |    
 21 |     Bound = MaxBottom + (GapSize div 2),
 22 |     ?DBG(
 23 |         "~w:reassign_fun: Level ~w.~n"
 24 |         "   COMMON   is ~w. ~n" 
 25 |         "   MIN      is ~w. ~n" 
 26 |         "   MAX      is ~w. ~n" 
 27 |         "   BOUND    is ~w. ~n" 
 28 |         "   GAP_SIZE is ~w. ~n" 
 29 |         "   TOP_SIZE is ~w. ~n" 
 30 |         "   BOT_SIZE is ~w. ~n" 
 31 |         "   MAX_BOT  is ~w. ~n" 
 32 |         "   MIN_TOP  is ~w. ~n" 
 33 |         "   OLD_MAX  is ~w. ~n" 
 34 |         , 
 35 |         [?MODULE, Lvl,
 36 |          Common, Min, Max, Bound, 
 37 |          GapSize, TopSize, BottomSize,
 38 |          MaxBottom, MinTop, OldMax]),
 39 | 
 40 | %% Reassign the weights in the collation element table at level n to create
 41 | %% a gap of size GAP above COMMON. Typically for secondaries or tertiaries 
 42 | %% this is done after the values have been reduced to a byte range by the 
 43 | %% above methods. Here is a mapping that moves weights up or down to create 
 44 | %% a gap in a byte range.
 45 | %% w -> w + 01 - MIN, for MIN <= w < COMMON
 46 | %% w -> w + FF - MAX, for COMMON < w <= MAX
 47 |     RaFn = fun(W) when W < Common -> W + NewMin - Min;
 48 |               (W) when W > Common -> W + NewMax - Max
 49 |            end,
 50 | 
 51 | %% If a synthetic high weight would be less than BOUND, use a 
 52 | %% sequence of high weights of the form (BOUND)..(BOUND)(MAXBOTTOM - 
 53 | %% remainder).
 54 |     SynFn = fun(high, SynWeight, List) when SynWeight < Bound -> 
 55 |                 {Remainder, NewList} = do_seq(Bound, SynWeight, List),
 56 |                 [(MaxBottom - Remainder) | NewList];
 57 | 
 58 | %% If a synthetic low weight would not be less than BOUND, use a sequence 
 59 | %% of low weights of the form (BOUND-1)..(BOUND-1)(MINTOP + remainder) to 
 60 | %% express the length of the sequence.
 61 |                (low, SynWeight, List) -> 
 62 |                 {Remainder, NewList} = do_seq(Bound - 1, SynWeight, List),
 63 |                 [(MinTop + Remainder) | NewList]
 64 |             end,
 65 |         
 66 | %% When generating a sort key, look for maximal sequences of m COMMON values 
 67 | %% in a row. Let W be the weight right after the sequence.
 68 | 
 69 |     Capacity = Max - Min,
 70 |     Result = [],
 71 | 
 72 |     Len = get_bits_len(Max),
 73 |     fun({to_binary, W}) -> <<<<X:Len>> || X <- W>>;
 74 |        (get_common_value) 
 75 |             -> Common; % for ux_uca_alt
 76 |        (Weights) -> do_reassign(Common, SynFn, RaFn, Weights, Result)
 77 |     end.
 78 |         
 79 | do_seq(Val, Rem, List) when Rem >= Val ->
 80 |     do_seq(Val, Rem - Val, [Val|List]);
 81 | do_seq(Val, Rem, List) ->
 82 |     {Rem, List}.
 83 |  
 84 | 
 85 | get_bits_len(Max) ->
 86 |     if
 87 |     Max =< 16#FF     -> 8;
 88 |     Max =< 16#FFFF   -> 16;
 89 |     Max =< 16#FFFFFF -> 24 
 90 |     end.
 91 | 
 92 | %% @param W:[int()] List of weights on this lvl.
 93 | %% @param R:[int()] Reversed list of resulted weights.
 94 | %% @param Cnt:int() Count of repeated Commons.
 95 | %% The last step is a bit too simple, because the synthetic weights must 
 96 | %% not collide with other values having long strings of COMMON weights. 
 97 | %% This is done by using a sequence of synthetic weights, absorbing as 
 98 | %% much length into each one as possible. 
 99 | %% A value BOUND is defined between MINTOP and MAXBOTTOM. 
100 | %% The exact value for BOUND can be chosen based on the expected 
101 | %% frequency of synthetic low weights versus high weights for the 
102 | %% particular collation element table.
103 | 
104 | 
105 | %% When generating a sort key, look for maximal sequences of 
106 | %% m (Cnt) COMMON values in a row. 
107 | do_reassign(Common, SynFn, RaFn, [W|WT], R) 
108 |     when (W =:= Common) -> 
109 |     {Cnt, NewWT} = do_common(W, WT, 1),
110 |     Type = syn_weight_type(NewWT, Common),
111 |     NewR = SynFn(Type, Cnt, R),
112 |     do_reassign(Common, SynFn, RaFn, NewWT, NewR) ;
113 | do_reassign(Common, SynFn, RaFn, [W|WT], R) ->
114 |     NewW = RaFn(W),
115 |     do_reassign(Common, SynFn, RaFn, WT, [NewW|R]);
116 | do_reassign(_Common, _SynFn, _RaFn, []=_W, R) ->
117 |     lists:reverse(R).
118 |     
119 | 
120 | %% The parameter is the tail of the string after the sequence.
121 | %%
122 | %% Let W be the weight right after the sequence. 
123 | %% If W < COMMON (or there is no W), replace the sequence by a synthetic 
124 | %%                low weight equal to (MINTOP + m).
125 | %% If W > COMMON, replace the sequence by a synthetic high weight equal 
126 | %%                to (MAXBOTTOM - m).
127 | syn_weight_type([W|_], Common) when W > Common -> 
128 |     high;
129 | syn_weight_type(_Str, _Common) -> 
130 |     low.
131 | 
132 | 
133 | %% Count of repeated the common weights (W).
134 | do_common(W, [WH|WT], Cnt) when WH=:=W ->
135 |     do_common(W, WT, Cnt+1);
136 | do_common(_W, WT, Cnt) ->
137 |     {Cnt, WT}.
138 | 
139 | get_common_value(_L = 2, Max) -> 
140 |     {32, Max};
141 | get_common_value(_L = 3, Max) -> 
142 |     {2, Max};
143 | get_common_value(_L = 4, Max) -> 
144 |     {Max+1, Max}.
145 | %get_common_value(_L = 4) -> 16#FFFF.
146 | 
147 | get_new_max(X) when X<240 -> 16#FF;
148 | get_new_max(X) when X<65530 -> 16#FFFF;
149 | get_new_max(_X) -> 16#FFFFFF.
150 | 
151 | 
152 | -ifdef(TEST).
153 | -include_lib("eunit/include/eunit.hrl").
154 | 
155 | 
156 | do_seq_test_() ->
157 |     F = fun do_seq/3,
158 |     [?_assertEqual(F(10, 20, []), {0, [10,10]})
159 |     ].
160 | 
161 | %% [0,0,0,4189], [0,0,0,4189,606]
162 | 
163 | cmp(X, Y) when X < Y -> '<';
164 | cmp(X, X) -> '=';
165 | cmp(_, _) -> '>'.
166 | 
167 | -define(_assertLower(X, Y), 
168 |         {unicode:characters_to_list(io_lib:format("Is ~w < ~w?", [X, Y])), 
169 |          ?_assertEqual([X, '<', Y], [X, cmp(X, Y), Y])}). 
170 | 
171 | 
172 | binarize(Fn) ->
173 |     fun(Val) ->
174 |         Fn({to_binary, Fn(Val)})
175 |         end.
176 | 
177 | lvl4_test_() ->
178 |     Fn2 = binarize(reassign_fun(2, 0, 221)),
179 |     Fn4 = binarize(reassign_fun(4, 0, 65501)),
180 |     [?_assertLower(Fn2([97,124]),   Fn2([124])) % DATA1
181 |     ,?_assertLower(Fn4([4189]),     Fn4([4189, 606]))
182 |     ,?_assertLower(Fn4([65500,644,65500]),     Fn4([65500,65500]))
183 |     ,?_assertLower(Fn4([65501,644,65501]),     Fn4([65501,65501]))
184 |     ].
185 | 
186 | 
187 | %% DATA1:
188 | %% Error (key): [8427,820] greater [820,1425]
189 | %%  Key1: <<0,0,158,131,0,224>>
190 | %%  Key2: <<0,0,158,0,225>>   
191 | %%  Arr1: [[non_variable,0,97,2,8427],[non_variable,0,124,2,820]]
192 | %%  Arr2: [[non_variable,0,124,2,820],[non_variable,0,0,0,1425]]
193 | %% Error in the compression algorithm.
194 | %%  Unzip Key1: [0,97,124,0,2,2]
195 | %%  Unzip Key2: [0,124,0,2]   
196 | %% sort_key and compare returns different results.
197 | %%  Data1: 20EB 0334
198 | %%  Data2: 0334 0591
199 | %%
200 | %%  Result (it is from eunit's output):
201 | %%  ux_uca_compress:162: lvl4_test_ (Is <<131,158>> < <<158>>?)...[0.001 s] ok
202 | 
203 |  
204 | %% ux_uca_compress:reassign_fun: Level 2.
205 | %%    COMMON   is 32.
206 | %%    MIN      is 0.
207 | %%    MAX      is 221.
208 | %%    BOUND    is 82.
209 | %%    GAP_SIZE is 33.
210 | %%    TOP_SIZE is 189.
211 | %%    BOT_SIZE is 32.
212 | %%    MAX_BOT  is 66.
213 | %%    MIN_TOP  is 33.
214 | %%    OLD_MAX  is 221.
215 | %% 
216 | %% =INFO REPORT==== 19-Jun-2012::13:41:39 ===
217 | %% ux_uca_compress:reassign_fun: Level 3.
218 | %%    COMMON   is 2.
219 | %%    MIN      is 0.
220 | %%    MAX      is 31.
221 | %%    BOUND    is 337.
222 | %%    GAP_SIZE is 223.
223 | %%    TOP_SIZE is 29.
224 | %%    BOT_SIZE is 2.
225 | %%    MAX_BOT  is 226.
226 | %%    MIN_TOP  is 3.
227 | %%    OLD_MAX  is 31.
228 | %% 
229 | %% =INFO REPORT==== 19-Jun-2012::13:41:39 ===
230 | %% ux_uca_compress:reassign_fun: Level 4.
231 | %%    COMMON   is 65502.
232 | %%    MIN      is 0.
233 | %%    MAX      is 65502.
234 | %%    BOUND    is 65551.
235 | %%    GAP_SIZE is 32.
236 | %%    TOP_SIZE is 0.
237 | %%    BOT_SIZE is 65502.
238 | %%    MAX_BOT  is 65535.
239 | %%    MIN_TOP  is 65503.
240 | %%    OLD_MAX  is 65501.
241 | 
242 | -endif.
243 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_decomp.erl:
--------------------------------------------------------------------------------
 1 | -module(ux_uca_decomp).
 2 | -export([compute/2]).
 3 | 
 4 | -import(ux_uca_utils, [
 5 |     get_ducet/0]).
 6 | 
 7 | -include("ux.hrl").
 8 | -include("ux_uca.hrl").
 9 | 
10 | compute(Char, List) ->
11 |     F = get_ducet(),
12 |     L3 = ux_unidata:tertiary_weight(Char),
13 |     L3Max = 16#001F,
14 |     cycle(1, List, F, L3, L3Max, []).
15 |     
16 | 
17 | cycle(Pos, [H|T], Ducet, Value, Max, Acc) ->
18 | 
19 |     %% Set the first two L3 values to be lookup (L3), where the lookup function 
20 |     %% uses the table in Section 7.2, Tertiary Weight Table. Set the remaining 
21 |     %% L3 values to MAX (which in the default table is 001F). 
22 |     NewL3 = if Pos>2 -> Max;
23 |                 true -> Value end,
24 | 
25 |     %% TODO: what to do when a weight is from more then 1 element? :)
26 | 
27 |     %% H is a code point.
28 |     Weight = Ducet([H]),
29 |     Weight =:= other andalso erlang:throw({bad_char, H}),
30 |     NewAcc = fill_l3(Weight, NewL3, Acc),
31 |     cycle(Pos+1, T, Ducet, Value, Max, NewAcc);
32 | 
33 | cycle(_Pos, [], _Ducet, _Value, _Max, Acc) -> 
34 |     ux_unidata_parser_allkeys:el_to_bin(lists:reverse(Acc)).
35 | 
36 | 
37 | fill_l3([H|T], NewL3, Acc) ->
38 |     [Var, L1, L2, _L3, L4] = H,
39 |     NewH = [Var, L1, L2, NewL3, L4],
40 |     fill_l3(T, NewL3, Acc);
41 | fill_l3([], _NewL3, Acc) -> Acc.
42 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_options.erl:
--------------------------------------------------------------------------------
 1 | %%% @doc This library contains functions for manipulating with
 2 | %%%      a configuration of sorting.
 3 | %%%      You can use it as:
 4 | %%%      `C = ux_uca_options:get_options(shifted).'
 5 | %%%      And then:
 6 | %%%      `ux_uca:sort(C, ["string1", "string2", "string3").'
 7 | %%% @end
 8 | 
 9 | -module(ux_uca_options).
10 | -export([get_options/0, get_options/1, get_options/2]).
11 | -include("ux_uca.hrl").
12 | 
13 | get_options() -> #uca_options{ }.
14 | 
15 | get_options(non_ignorable) ->
16 |     #uca_options { 
17 |         natural_sort = false,
18 |         strength = 3,
19 |         alternate = non_ignorable
20 |     };
21 | get_options(blanked) ->
22 |     #uca_options { 
23 |         natural_sort = false,
24 |         strength = 3,
25 |         alternate = blanked
26 |     };
27 | get_options(shifted) ->
28 |     #uca_options { 
29 |         natural_sort = false,
30 |         strength = 4,
31 |         alternate = shifted
32 |     };
33 | get_options(shift_trimmed) ->
34 |     #uca_options { 
35 |         natural_sort = false,
36 |         strength = 4,
37 |         alternate = shift_trimmed 
38 |     };
39 | get_options([_|_] = Params) ->
40 |     C = get_options(),
41 |     get_options(C, Params).
42 | 
43 | %% @doc If you want use this library without import *.hrl, you can create 
44 | %% a #uca_options {} record with this function.
45 | %% @end
46 | get_options(C=#uca_options{}, 
47 |     [{hangul_terminator, Val}|T]) ->
48 |     NewC = C#uca_options{ hangul_terminator=Val },
49 |     get_options(NewC, T);
50 | 
51 | get_options(C=#uca_options{}, 
52 |     [{natural_sort, Val}|T]) ->
53 |     NewC = C#uca_options{ natural_sort=Val },
54 |     get_options(NewC, T);
55 | 
56 | get_options(C=#uca_options{}, 
57 |     [{backwards, Val}|T]) ->
58 |     NewC = C#uca_options{ backwards=Val },
59 |     get_options(NewC, T);
60 | 
61 | get_options(C=#uca_options{}, 
62 |     [{case_sensitive, Val}|T]) ->
63 |     NewC = C#uca_options{ case_sensitive=Val },
64 |     get_options(NewC, T);
65 | 
66 | get_options(C=#uca_options{}, 
67 |     [{case_first, Val}|T]) ->
68 |     NewC = C#uca_options{ case_first=Val },
69 |     get_options(NewC, T);
70 | 
71 | get_options(C=#uca_options{}, 
72 |     [{strength, Val}|T]) ->
73 |     NewC = C#uca_options{ strength=Val },
74 |     get_options(NewC, T);
75 | 
76 | get_options(C=#uca_options{}, 
77 |     [{alternate, Val}|T]) ->
78 |     NewC = C#uca_options{ alternate=Val },
79 |     get_options(NewC, T);
80 | 
81 | get_options(C=#uca_options{}, 
82 |     [{sort_key_format, Val}|T]) ->
83 |     NewC = C#uca_options{ sort_key_format=Val },
84 |     get_options(NewC, T);
85 | 
86 | get_options(C=#uca_options{}, []) ->
87 |     C.
88 | 
89 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_sort_key_binary.erl:
--------------------------------------------------------------------------------
 1 | -module(ux_uca_sort_key_binary).
 2 | -export([sort_key/2]).
 3 | -import(ux_uca, [sort_array/2]).
 4 | 
 5 | -import(ux_uca_utils, [
 6 |     do_alt/2, 
 7 |     get_ducet/0, 
 8 |     get_options/0, 
 9 |     split_levels/3, 
10 |     get_reassign_function/2]).
11 | 
12 | -include("ux.hrl").
13 | -include("ux_uca.hrl").
14 | 
15 | sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) ->
16 |     W = sort_array(C, S),
17 |     D = get_ducet(),
18 |     A = ux_uca_alt:get_alternate_function(C, D),
19 |     R = [], % Remains
20 |     K = [], % Key
21 |     do_sort_key1(MaxLvl, B, W, D, A, R, K).
22 | 
23 | %% @param S::integer()  Strength (Max level)
24 | %% @param B
25 | %% @param W::[binary()] Weights
26 | %% @param D::fun()      Ducet
27 | %% @param A::fun()      Altername
28 | %% @param R::[[int()]]  Remain weights
29 | %% @param K::[int()]    Result key
30 | do_sort_key1(S, B, [WH|WT], D, A, R, K) ->
31 |     {NewA, Ints} = do_alt(A, WH),
32 |     case Ints of
33 |     [0|Rem] ->
34 |         do_sort_key1(S, B, WT, D, NewA, [Rem|R], K);
35 |     [L1|Rem] ->
36 |         do_sort_key1(S, B, WT, D, NewA, [Rem|R], [L1|K]);
37 |     _ ->
38 |         do_sort_key1(S, B, WT, D, NewA, R, K)
39 |     end;
40 | do_sort_key1(S, B, [], D, A, R, K) 
41 |     when (S > 1) ->
42 |     W = lists:reverse(R),
43 |     L = 2, % Level
44 |     WL = [],
45 |     NewK = [0|K],
46 |     RevK = lists:reverse(NewK),
47 |     Fn = get_reassign_function(D, 1),
48 |     BinK = Fn({to_binary, RevK}),
49 |     do_sort_key2(S, B, L, W, D, BinK);
50 | do_sort_key1(S, _B, [], D, _A, _R, K) ->
51 |     Fn = get_reassign_function(D, 1),
52 |     RevK = lists:reverse(K),
53 |     Fn({to_binary, RevK}). % Return result
54 | 
55 | %% L::int() Level
56 | %% WL::[int()] Weigth on level L
57 | do_sort_key2(S, B, L, W, D, K) ->
58 |     {LvlW, RemW} = split_levels(L, B, W),
59 |     Fn = get_reassign_function(D, L),
60 |     ReassignW = Fn(LvlW),
61 |     case RemW of
62 |     _ when RemW=:=[]; S=<L -> 
63 |         BinW = Fn({to_binary, ReassignW}),
64 |         <<K/binary, BinW/binary>>;
65 | 
66 |     [_|_] -> 
67 |         % Add a delimeter.
68 |         BinW  = Fn({to_binary, ReassignW}),
69 |         Delim = Fn({to_binary, [0]}),
70 |         NewK  = <<K/binary, BinW/binary, Delim/binary>>,
71 | 
72 |         do_sort_key2(S, B, L+1, RemW, D, NewK)
73 |     end.
74 | 
75 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_sort_key_binary_cs.erl:
--------------------------------------------------------------------------------
 1 | %%% This module is a variant of ux_uca_sort_key_binary 
 2 | %%% for case sensative collation.
 3 | -module(ux_uca_sort_key_binary_cs).
 4 | -export([sort_key/2]).
 5 | -import(ux_uca, [sort_array/2]).
 6 | -import(ux_uca_utils, [
 7 |     do_alt/2, 
 8 |     get_ducet/0, 
 9 |     get_options/0, 
10 |     split_levels/3, 
11 |     get_reassign_function/2]).
12 | 
13 | -include("ux.hrl").
14 | -include("ux_uca.hrl").
15 | 
16 | sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) ->
17 |     W = sort_array(C, S),
18 |     D = get_ducet(),
19 |     A = ux_uca_alt:get_alternate_function(C, D),
20 |     R = [], % Remains
21 |     K = [], % Key
22 |     do_sort_key1(MaxLvl, B, W, D, A, R, K).
23 | 
24 | %% @param S::integer()  Strength (Max level)
25 | %% @param B::[boolean()] Backward flag
26 | %% @param W::[binary()] Weights
27 | %% @param D::fun()      Ducet
28 | %% @param A::fun()      Altername
29 | %% @param R::[[int()]]  Remain weights
30 | %% @param K::[int()]    Result key
31 | do_sort_key1(S, B, [WH|WT], D, A, R, K) ->
32 |     {NewA, Ints} = do_alt(A, WH),
33 |     case Ints of
34 |     [0|Rem] ->
35 |         do_sort_key1(S, B, WT, D, NewA, [Rem|R], K);
36 |     [L1|Rem] ->
37 |         do_sort_key1(S, B, WT, D, NewA, [Rem|R], [L1|K]);
38 |     _ ->
39 |         do_sort_key1(S, B, WT, D, NewA, R, K)
40 |     end;
41 | do_sort_key1(S, B, [], D, A, R, K) 
42 |     when (S > 1) ->
43 |     W = lists:reverse(R),
44 |     L = 2, % Level
45 |     WL = [],
46 |     NewK = [0|K],
47 |     RevK = lists:reverse(NewK),
48 |     Fn = get_reassign_function(D, 3),
49 |     BinK = Fn({to_binary, RevK}),
50 |     do_sort_key2(S, B, L, W, D, BinK);
51 | do_sort_key1(_S=0, _B, [], D, _A, _R, K) ->
52 |     Fn = get_reassign_function(D, 3),
53 |     RevK = lists:reverse(K),
54 |     Fn({to_binary, RevK}). % Return result
55 | 
56 | %% L::int() Level
57 | %% WL::[int()] Weigth on level L
58 | do_sort_key2(S, B, L, W, D, K) ->
59 |     {LvlW, RemW} = split_levels(L, B, W),
60 |     % Get a reassign function.
61 |     RaFn = get_reassign_function(D, L),
62 |     ReassignW = RaFn(LvlW),
63 |     % We swap values on L1 and L3.
64 |     % So, now we use to_binary for L1 on L3, and v.v.
65 |     BinFn = get_reassign_function(D, case L of 3 -> 1; _ -> L end),
66 |     case RemW of
67 |     _ when RemW=:=[]; S=<L -> 
68 |         RevW = lists:reverse(ReassignW),
69 |         BinW = BinFn({to_binary, RevW}),
70 |         <<K/binary, BinW/binary>>;
71 | 
72 |     [_|_] -> 
73 |         % Add a delimeter.
74 |         NewW = [0|ReassignW],
75 |         RevW = lists:reverse(NewW),
76 |         BinW = BinFn({to_binary, RevW}),
77 |         NewK = <<K/binary, BinW/binary>>,
78 | 
79 |         do_sort_key2(S, B, L+1, RemW, D, NewK)
80 |     end.
81 | 
82 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_sort_key_list.erl:
--------------------------------------------------------------------------------
 1 | -module(ux_uca_sort_key_list).
 2 | -export([sort_key/2]).
 3 | -import(ux_uca, [sort_array/2]).
 4 | 
 5 | -import(ux_uca_utils, [
 6 |     do_alt/2, 
 7 |     get_ducet/0, 
 8 |     get_options/0, 
 9 |     split_levels/3, 
10 |     get_reassign_function/2]).
11 | 
12 | -include("ux.hrl").
13 | -include("ux_uca.hrl").
14 | 
15 | sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) ->
16 |     W = sort_array(C, S),
17 |     D = get_ducet(),
18 |     A = ux_uca_alt:get_alternate_function(C, D),
19 |     R = [], % Remains
20 |     K = [], % Key
21 |     lists:reverse(do_sort_key1(MaxLvl, B, W, D, A, R, K)).
22 | 
23 | %% @param S::integer()  Strength (Max level)
24 | %% @param W::[binary()] Weights
25 | %% @param D::fun()      Ducet
26 | %% @param A::fun()      Altername
27 | %% @param R::[[int()]]  Remain weights
28 | %% @param K::[int()]    Result key
29 | do_sort_key1(S, B, [WH|WT], D, A, R, K) ->
30 |     {NewA, Ints} = do_alt(A, WH),
31 |     case Ints of
32 |     [0|Rem] ->
33 |         do_sort_key1(S, B, WT, D, NewA, [Rem|R], K);
34 |     [L1|Rem] ->
35 |         do_sort_key1(S, B, WT, D, NewA, [Rem|R], [L1|K]);
36 |     _ ->
37 |         do_sort_key1(S, B, WT, D, NewA, R, K)
38 |     end;
39 | do_sort_key1(S, B, [], D, A, R, K) 
40 |     when (S > 1) ->
41 |     W = lists:reverse(R),
42 |     L = 2, % Level
43 |     WL = [],
44 |     do_sort_key2(S, B, L, W, D, [0|K]);
45 | do_sort_key1(S, B, [], D, A, R, K) ->
46 |     K. % Return result
47 | 
48 | %% L::int() Level
49 | %% WL::[int()] Weigth on level L
50 | do_sort_key2(S, B, L, W, D, K) ->
51 |     {LvlW, NewW} = split_levels(L, B, W),
52 |     Fn = get_reassign_function(D, L),
53 |     ReassignW = Fn(LvlW),
54 |     NewK = lists:reverse(ReassignW, K),
55 |     case NewW of
56 |     []    -> NewK;
57 |     _ when S=<L 
58 |           -> NewK;
59 |     [_|_] -> do_sort_key2(S, B, L+1, NewW, D, [0|NewK])
60 |     end.
61 | 
62 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_sort_key_uncompressed.erl:
--------------------------------------------------------------------------------
 1 | -module(ux_uca_sort_key_uncompressed).
 2 | -export([sort_key/2]).
 3 | -import(ux_uca, [sort_array/2]).
 4 | 
 5 | -import(ux_uca_utils, [
 6 |     do_alt/2, 
 7 |     get_ducet/0, 
 8 |     get_options/0, 
 9 |     split_levels/3, 
10 |     get_reassign_function/2]).
11 | 
12 | -include("ux.hrl").
13 | -include("ux_uca.hrl").
14 | 
15 | sort_key(C=#uca_options{strength=MaxLvl, backwards=B}, S) ->
16 |     W = sort_array(C, S),
17 |     D = get_ducet(),
18 |     A = ux_uca_alt:get_alternate_function(C, D),
19 |     R = [], % Remains
20 |     K = [], % Key
21 |     lists:reverse(do_sort_key1(MaxLvl, B, W, A, R, K)).
22 | 
23 | %% @param S::integer()  Strength (Max level)
24 | %% @param W::[binary()] Weights
25 | %% @param D::fun()      Ducet
26 | %% @param A::fun()      Altername
27 | %% @param R::[[int()]]  Remain weights
28 | %% @param K::[int()]    Result key
29 | do_sort_key1(S, B, [WH|WT], A, R, K) ->
30 |     {NewA, Ints} = do_alt(A, WH),
31 |     case Ints of
32 |     [0|Rem] ->
33 |         do_sort_key1(S, B, WT, NewA, [Rem|R], K);
34 |     [L1|Rem] ->
35 |         do_sort_key1(S, B, WT, NewA, [Rem|R], [L1|K]);
36 |     _ ->
37 |         do_sort_key1(S, B, WT, NewA, R, K)
38 |     end;
39 | do_sort_key1(S, B, [], A, R, K) 
40 |     when (S > 1) ->
41 |     W = lists:reverse(R),
42 |     L = 2, % Level
43 |     WL = [],
44 |     do_sort_key2(S, B, L, W, [0|K]);
45 | do_sort_key1(S, _B, [], A, R, K) ->
46 |     K. % Return result
47 | 
48 | %% L::int() Level
49 | %% WL::[int()] Weigth on level L
50 | do_sort_key2(S, B, L, W, K) ->
51 |     {LvlW, NewW} = split_levels(L, B, W),
52 |     NewK = lists:reverse(LvlW, K),
53 |     case NewW of
54 |     []    -> NewK;
55 |     _ when S=<L 
56 |           -> NewK;
57 |     [_|_] -> do_sort_key2(S, B, L+1, NewW, [0|NewK])
58 |     end.
59 | 
60 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_testdata.erl:
--------------------------------------------------------------------------------
 1 | %%% There are helpers for reading testdata.
 2 | %%% Used in ux_uca_tests and for generating BEAM module for ucol.
 3 | -module(ux_uca_testdata).
 4 | -export([read_line/2, read_shifted/0]).
 5 | 
 6 | 
 7 | read_shifted() ->
 8 |     Fd = ux_unidata:open_test_file('collation_test_shifted'),
 9 |     read_shifted_(skip(Fd), 0, []).
10 | 
11 | 
12 | read_shifted_(Fd, Num, Acc) ->
13 |     case read_line(Fd, Num) of
14 |         {RawString, [], NewNum} ->
15 |             read_shifted_(Fd, NewNum, Acc);
16 |         {RawString, Points, NewNum} ->
17 |             read_shifted_(Fd, NewNum, [Points|Acc]);
18 |         eof -> lists:reverse(Acc) 
19 |     end.
20 | 
21 | 
22 | skip(Fd) ->
23 |     case io:get_line(Fd, "") of
24 |         "\n" -> Fd;
25 |         X when is_list(X) -> skip(Fd)
26 |     end.
27 | 
28 | 
29 | %% @doc Read line from a testdata file Fd (see CollationTest.html).
30 | %% Returns {Plain string, List of Codepoints, StrNum} or eof
31 | %% Used by test/4.
32 | %% @end
33 | read_line(Fd, StrNum) ->
34 |     case io:get_line(Fd, "") of
35 |     eof -> eof;
36 |     {error,Mess} -> throw({error, "Error while reading file", Mess});
37 |     "#" ++ _Comment ->
38 |         read_line(Fd, StrNum + 1);
39 |     Data ->
40 |         try % parse Data
41 |             [Value|_] = ux_string:split(["#", ";", "\n"], Data),
42 |             %% Converts "0009 0021" to [16#0009, 16#0021]
43 |             Parsed = lists:map(fun ux_unidata_parser:hex_to_int/1,
44 |                       string:tokens(Value, " ")),
45 |             %% Delete false values.
46 |             Res = [X || X <- Parsed, X =/= false],
47 | 
48 |             {Data, Res, StrNum + 1} % {FullStr, Codepaints}
49 |         catch
50 |         error:Reason ->
51 | %           io:format(user, "~w: Data=~w ~n", [Reason, Data]),
52 |             read_line(Fd, StrNum + 1)
53 |         end
54 |     end.
55 | 
56 | 


--------------------------------------------------------------------------------
/src/uca/ux_uca_utils.erl:
--------------------------------------------------------------------------------
 1 | %% Contains private common functions.
 2 | -module(ux_uca_utils).
 3 | -export([
 4 |     do_alt/2, 
 5 |     do_alt/3, 
 6 |     do_extract/3, 
 7 |     get_ducet/0, 
 8 |     get_options/0, 
 9 |     split_levels/3, 
10 |     get_reassign_function/2]).
11 | 
12 | %% For debugging only
13 | -export([hangul_type/1,
14 |          implicit_type/1]).
15 | 
16 | -include("ux.hrl").
17 | -include("ux_uca.hrl").
18 | 
19 | 
20 | hangul_type(X) when ?IS_L1_OF_HANGUL_L(X) -> l;
21 | hangul_type(X) when ?IS_L1_OF_HANGUL_V(X) -> v;
22 | hangul_type(X) when ?IS_L1_OF_HANGUL_T(X) -> t;
23 | hangul_type(_) -> x.
24 | 
25 | 
26 | implicit_type(X) when ?CHAR_IS_UNIFIED_IDEOGRAPH(X) ->
27 |     if (?CHAR_IS_CJK_COMPATIBILITY_IDEOGRAPH(X)
28 |         or ?CHAR_IS_CJK_UNIFIED_IDEOGRAPH(X)) ->
29 |         base1;
30 |      true ->
31 |         base2
32 |     end;
33 | implicit_type(_) ->
34 |     base3.
35 | 
36 | %%
37 | %% Helpers
38 | %%     
39 | 
40 | -spec do_alt(fun(), binary()|integer()) -> [integer()].
41 | do_alt(A, W) -> A(W).
42 | do_alt(A, W, S) -> 
43 |     {NewA, AltW} = A(W),
44 |     NewAltW = lists:sublist(AltW, S),
45 |     {NewA, NewAltW}.
46 | 
47 | -spec get_ducet() -> fun().
48 | get_ducet() -> ux_unidata:ducet(skip_check).
49 | 
50 | -spec do_extract(#uca_options{}, string(), fun()) -> 
51 |         {integer(), string()}.
52 | %% Extract a weight from the string.
53 | %% Weights is [<<L1, L2, L3, L4>>, <<L1, L2, L3, L4>>].
54 | do_extract(C, S, D) ->
55 |     {_Weights, _NewS} = ux_uca_extract:extract(C, D, S).
56 | 
57 | -spec get_options() -> #uca_options{}.
58 | get_options() -> #uca_options{}.
59 |     
60 | 
61 | %%
62 | %% Sort Key Functions
63 | %%
64 | 
65 | % L2 is backward.
66 | -spec split_levels(integer(), boolean(), [[integer()]]) -> 
67 |         {[integer()], [[integer()]]}.
68 | 
69 | split_levels(_L=2, _B=true, W) -> 
70 |     {Res, Rem} = do_split_levels(W, [], []),
71 |     {Res, lists:reverse(Rem)};
72 | split_levels(_L, _B, W) -> 
73 |     {Res, Rem} = do_split_levels(W, [], []),
74 |     {lists:reverse(Res), lists:reverse(Rem)}.
75 |     
76 | do_split_levels([WH|WT], Res, Rem) ->
77 |     case WH of
78 |     [0] -> do_split_levels(WT, Res, Rem);
79 |     [0|T] -> do_split_levels(WT, Res, [T|Rem]);
80 |     [H] -> do_split_levels(WT, [H|Res], Rem);
81 |     [H|T] -> do_split_levels(WT, [H|Res], [T|Rem])
82 |     end;
83 | do_split_levels([], Res, Rem) ->
84 |     {Res, Rem}.
85 |     
86 | get_reassign_function(D, L) ->
87 |     D({reassign_function, L}).
88 | 


--------------------------------------------------------------------------------
/src/unidata/ux.hrl:
--------------------------------------------------------------------------------
1 | -include("../ux.hrl").
2 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_filelist.erl:
--------------------------------------------------------------------------------
  1 | %%% @doc Key-value store for the list of the servers which 
  2 | %%%      serve the unidata files.
  3 | %%% @private
  4 | -module(ux_unidata_filelist).
  5 | -include("ux.hrl").
  6 | 
  7 | % OTP 
  8 | -export([start_link/0]).
  9 | -export([init/1, terminate/2, handle_call/3, handle_info/2]).
 10 | 
 11 | % Inter-module exports
 12 | -export([reg_pid/2, file_owner/1]).
 13 | 
 14 | % Unidata API
 15 | -export([set_source/4, set_source/2, 
 16 |     get_source/2, get_source/1, get_source_from/2]).
 17 | 
 18 | -behavior(gen_server).
 19 | 
 20 | -record(state, {
 21 |         key2server
 22 | }).
 23 | 
 24 | 
 25 | %% Exported Client Functions
 26 | %% Operation & Maintenance API
 27 | start_link() ->
 28 |     Arguments = [],
 29 |     Opts = [],
 30 |     gen_server:start_link({local, ?MODULE}, ?MODULE, Arguments, Opts).
 31 | 
 32 | init([]) ->
 33 |     Dict = dict:new(),
 34 |     State = #state{
 35 |             key2server = Dict
 36 |             },
 37 |     {ok, State}.
 38 | 
 39 | terminate(_Reason, _LoopData) ->
 40 |     ok.
 41 | 
 42 | 
 43 | % Save pid of ux_unidata_store server in a dict.
 44 | % Key is a combination of filename and fileoptions.
 45 | handle_call({reg_pid, Key, StorePid}, _From, State = #state{key2server = K2S}) ->
 46 |     erlang:monitor(process, StorePid),
 47 |     ?DBG("~w: Registrate a new process ~w with the key ~w. ~n", 
 48 |         [?MODULE, StorePid, Key]),
 49 |     
 50 |     {Reply, K2S_2} = case dict:is_key(Key, K2S) of
 51 |         false -> 
 52 |             {ok, dict:store(Key, StorePid, K2S)};
 53 |         true  -> 
 54 |             error_logger:error_msg("~w: The key ~w is already registred. ~w",
 55 |                 [?MODULE, Key]),
 56 |             {{error, key_already_registred}, K2S}
 57 |     end,
 58 |     State_2 = State#state{key2server = K2S_2},
 59 |     {reply, Reply, State_2};
 60 | 
 61 | handle_call({get_pid, Key}, _From, State = #state{key2server = K2S}) ->
 62 |     Reply = dict:find(Key, K2S), % {ok, Value} or error
 63 |     {reply, Reply, State}.
 64 | 
 65 | 
 66 | 
 67 | % Server is dead, unregister it.
 68 | % Delete pid from the dict. FromPid is a pid of ux_unidata_store server.
 69 | handle_info({'DOWN', _Ref, process, ServerPid, _Reason},
 70 |             State = #state{key2server = K2S}) ->
 71 |     ?DBG("~w: Delete Pid = ~w from the dictionary. ~n", 
 72 |          [?MODULE, FromPid]),
 73 |     K2S_2   = dict:filter(fun(_K, V) -> V =/= ServerPid end, K2S),
 74 |     State_2 = State#state{key2server = K2S_2},
 75 |     {noreply, State_2}.
 76 | 
 77 | 
 78 | %%
 79 | %% API
 80 | %%
 81 | -spec set_source(Level::atom(), Parser::atom(), Types::[atom()] | all,
 82 |                  FileName::string()) -> ok.
 83 | 
 84 | %% @doc Set a data source (data file) with UNIDATA.
 85 | %%
 86 | %% 1. Runs server which parses file and returns a list of funs.
 87 | %% 2. Registers returned funs on the process, application or node level.
 88 | %%
 89 | %% Test me:
 90 | %% ux_unidata_filelist:set_source(process, blocks, all, code:priv_dir(ux) ++ "/UNIDATA/Blocks.txt").
 91 | %% ux_unidata_filelist:set_source(process, blocks, [blocks], code:priv_dir(ux) ++ "/UNIDATA/Blocks.txt").
 92 | %% F=ux_unidata_filelist:get_source(blocks, blocks).
 93 | %% {ok,S}=ux_unidata_filelist:file_owner(code:priv_dir(ux) ++ "/UNIDATA/Blocks.txt").
 94 | %% ux_unidata_store:remove_type(S, blocks).
 95 | %%
 96 | %% Returns nothing.
 97 | 
 98 | set_source('node', Parser, Types, FileName) ->
 99 |     Key = {Parser, Types, FileName},
100 |     ux_unidata_server:set_default(Key);
101 |     
102 | set_source(Level, Parser, Types, FileName) ->
103 |     Key = {Parser, Types, FileName},
104 |     ClientPid = case Level of
105 |         'process' -> 
106 |             self();
107 |         'application' ->
108 |             % We unload data, when the application die.
109 |             {ok, AppName1} = application:get_application(), % let it crash
110 |             get_application_pid(AppName1)
111 |     end,
112 |     Key = {Parser, Types, FileName},
113 |     Funs = lists:map(fun({Type, Ets, Fun}) ->
114 |         %% This function allows to extract data from different data sources:
115 |         %% * ETS tables;
116 |         %% * dinamicly compilled modules.
117 |         DataSourceFun = fun
118 |             %% Run check only once.
119 |             %% For fast realizations of filters.
120 |             ('skip_check') ->
121 | 
122 |                 case ets:info(Ets, 'owner') of
123 |                 undefined -> 
124 |                    set_source(process, Parser, [Type], FileName),
125 |                    NewFun = get_source(Parser, Type);
126 |                 _ -> Fun
127 |                 end;
128 | 
129 |             %% For ux_unidata_server. Check ETS before return value.
130 |             ('test') ->
131 |                 case ets:info(Ets, 'owner') of
132 |                 'undefined' -> 
133 |                     false;
134 |                 _ -> true
135 |                 end;
136 | 
137 |             %% Get an ETS table (need for CLDR).
138 |             ('get_table') ->
139 |                 case ets:info(Ets, 'owner') of
140 |                 'undefined' -> 
141 |                     set_source('node', Parser, [Type], FileName),
142 |                     NewFun = get_source(Parser, Type),
143 |                     NewFun('get_table');
144 |                 _ -> 
145 |                     Ets
146 |                 end;
147 | 
148 |             ('reload') ->
149 |                 case ets:info(Ets, 'owner') of
150 |                 'undefined' -> 
151 |                     set_source('node', Parser, [Type], FileName),
152 |                     NewFun = get_source(Parser, Type),
153 |                     ok;
154 |                 _ -> ok
155 |                 end;
156 | 
157 |             %% Check an ETS table and run function.
158 |             (Val) ->
159 |                 try
160 |                     Fun(Val)
161 |                 catch
162 |                 error:badarg -> 
163 |                     case ets:info(Ets) of
164 |                     'undefined' -> 
165 |                         set_source(Level, Parser, [Type], FileName),
166 |                         NewFun = get_source(Parser, Type),
167 |                         NewFun(Val) 
168 |                     end
169 |                 end 
170 |             end, %% of DataSourceFun
171 | 
172 |         % Set an upgrade trigger.
173 |         {{Parser, Type}, DataSourceFun}
174 |         
175 |         end, get_funs(Key, ClientPid)),
176 | 
177 |     ?DBG("~w: Loaded funs: ~w. ~n", 
178 |         [?MODULE, Funs]),
179 | 
180 |     case Level of
181 |     'process' -> 
182 |         % Put to the process dictionary.
183 |         set_proc_dict(Funs);
184 |     'application' ->
185 |         {ok, AppName2} = application:get_application(), 
186 |         set_app_env(AppName2, Funs)
187 |     end,
188 |     ok.
189 | 
190 | 
191 | %% This is a short form of function.
192 | set_source(Level, {Parser, Types, Filename} = _Key) -> 
193 |     set_source(Level, Parser, Types, Filename).
194 | 
195 | %% @doc Return registred fun.
196 | %% Check: the dict of client process, then application enviroment, 
197 | %% then try get the default value from the server.
198 | get_source(Parser, Type) ->
199 |     Value = {Parser, Type},
200 |     get_source(Value).
201 | 
202 | 
203 | -spec get_source({Parser::atom(), Type::atom()}) -> fun() | undefined.
204 | 
205 | %% @doc Try retrieve the information about the data source:
206 | %% Step 1: Check process dictionary.
207 | %% Step 2: Check application enviroment.
208 | %% Step 3: Use defaults.
209 | get_source(Value) ->
210 |     case get_source_from(process, Value) of            % step 1
211 |     'undefined' -> 
212 |         case get_source_from(application, Value) of    % step 2
213 |         'undefined' -> get_source_from('node', Value); % step 3
214 |         Fun -> Fun
215 |         end;
216 |     Fun -> Fun
217 |     end.
218 | 
219 | 
220 | %%
221 | %% Inter-module exports
222 | %%
223 | 
224 | get_source_from('process', Value) ->
225 |     erlang:get(Value);
226 | get_source_from('application', Value) ->
227 |     application:get_env(Value);
228 | get_source_from('node', Value) ->
229 |     case erlang:whereis(ux_unidata_server) of
230 |     undefined ->
231 |         ux:start(),
232 |         ux_unidata_server:get_default(Value);
233 |     _ -> 
234 |         ux_unidata_server:get_default(Value) 
235 |     end.
236 |     
237 |     
238 |     
239 | %% Return the list of functions from the server.
240 | -spec get_funs(Key::{Parser::atom(), Types::[atom()], FileName::string()},
241 |     pid()) -> [{Type::atom(), Ets::integer(), fun()}].
242 | get_funs({_,Types,_} = Key, ClientPid) ->
243 |     ServerPid = get_pid(Key, ClientPid),
244 |     ux_unidata_store:get_funs(ServerPid, Types).
245 | 
246 | set_app_env(Name, [{Key, Val}|Tail]) ->
247 |     ?DBG("~w: Set a application enviroment variable ~w::~w to ~w. ~n", 
248 |         [?MODULE, Name, Key, Val]),
249 |     application:set_env(Name, Key, Val),
250 |     set_app_env(Name, Tail);
251 | set_app_env(_Name, []) -> ok.
252 |     
253 |     
254 | set_proc_dict([{Key, Val}|Tail]) ->
255 |     ?DBG("~w: Put the value to the process dictionary: ~w::~w to ~w. ~n", 
256 |         [?MODULE, self(), Key, Val]),
257 |     erlang:put(Key, Val),
258 |     set_proc_dict(Tail);
259 | set_proc_dict([]) -> ok.
260 | 
261 | 
262 | %% Convert the name of the application to its pid.
263 | get_application_pid(Name) ->
264 |     AInfo = application:info(),
265 |     {'running', R} = lists:keyfind('running', 1, AInfo),
266 |     {Name, Pid} = lists:keyfind(Name, 1, R).
267 | 
268 | 
269 | %% Try to get a pid of the owner of an ETS table with the UNIDATA 
270 | %% from the Key-file.
271 | %% Also, try to monitor ClientPid on the server.
272 | get_pid(Key, ClientPid) when is_pid(ClientPid) ->
273 |     case file_owner(key_to_id(Key)) of
274 |         error -> 
275 |             {ok, StoreServerPid} = ux_unidata_store_sup:read_file(Key, ClientPid),
276 |             StoreServerPid;
277 |             
278 |         % Server is already running.
279 |         {ok, StoreServerPid} when is_pid(StoreServerPid) -> 
280 |             ux_unidata_store:monitor_client_process(StoreServerPid, ClientPid), 
281 |             ux_unidata_store:check_types(StoreServerPid, key_to_types(Key)),
282 |             StoreServerPid
283 |     end.
284 | 
285 | %% @private
286 | %% Returns a pid of the server which serves a file with UNIDATA.
287 | %% Each file has an own owner.
288 | file_owner(FileName) ->
289 |     gen_server:call(?MODULE, {get_pid, FileName}).
290 | 
291 | %% Used only by ux_unidata_store:init/1. 
292 | %% Don't use this function from user code.
293 | %% Throws {badmatch,{error,key_already_registred}} if self() is already 
294 | %% registred.
295 | reg_pid(Key, StoreServerPid) when is_pid(StoreServerPid) ->
296 |     ok = gen_server:call(?MODULE, {reg_pid, key_to_id(Key), StoreServerPid}).
297 | 
298 | %%
299 | %% Private helper functions.
300 | %%
301 | 
302 | key_to_id({Parser, _Types, FileName} = Key) ->
303 |     % Get client enviroment:
304 |     Env = ux_unidata_store:get_env(Key),
305 |     {Parser, FileName, Env}.
306 | 
307 | key_to_types({_Parser, Types, _FileName}) ->
308 |     Types.
309 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_allkeys.erl:
--------------------------------------------------------------------------------
  1 | %%% Example:
  2 | %%% ux_unidata_filelist:get_pid({allkeys, [ducet], code:priv_dir(ux) ++ "/UNIDATA/allkeys.txt"}).
  3 | %%% @private
  4 | -module(ux_unidata_parser_allkeys).
  5 | -include("ux.hrl").
  6 | %-include("ux_string.hrl").
  7 | 
  8 | 
  9 | -export([parse/1, types/0, get_function/2
 10 |     , after_parse/1 % comment to disable post processing
 11 |     ]).
 12 | 
 13 | %% For ux_uca_decomp 
 14 | -export([el_to_bin/1]).
 15 | 
 16 | 
 17 | types() -> [ducet].
 18 | 
 19 | parse(In) ->
 20 |     case ux_unidata_parser:split($;, In) of
 21 |     [[_|_] = Code, [_|_] = Element] ->
 22 |         InEl = lists:map(fun ux_unidata_parser:hex_to_int/1,
 23 |                         string:tokens(Code, " ")),
 24 |         OutEl = parse_el(ux_unidata_parser:delete_spaces(Element)),
 25 |         %io:format("String: ~ts, From reversed: ~w, To: ~w~n", [In, InEl, OutEl]),
 26 | 
 27 |         Res = case InEl of 
 28 |             [] -> skip; 
 29 |             _ -> {InEl, OutEl} 
 30 |         end,
 31 |         {ok,
 32 |             [{ducet,   Res}
 33 |             ]
 34 |         };
 35 |     _ -> skip
 36 |     end.
 37 | 
 38 | after_parse(Ets) ->
 39 |     do_after(Ets),
 40 |     ok.
 41 | 
 42 | get_function(ducet, Table) -> 
 43 |     % R1 is only for encoding to binary, not reassign.
 44 |     R1 = get_reassign_function(Table, 1),
 45 | 
 46 |     R2 = get_reassign_function(Table, 2),
 47 |     R3 = get_reassign_function(Table, 3),
 48 |     R4 = get_reassign_function(Table, 4),
 49 |     F = ux_unidata_parser:ets_fun(Table, other),
 50 |     LTable = get_val(Table, 'LTable'),
 51 |     MTable = get_val(Table, 'MTable'),
 52 |     MF = fun(Value) ->
 53 |             case ets:member(LTable, Value) of
 54 |             true -> true;
 55 |             false -> 
 56 |                 case ets:member(MTable, Value) of
 57 |                 true -> maybe;
 58 |                 false -> false
 59 |                 end
 60 |             end
 61 |         end,
 62 |     
 63 |     fun(member_function) -> MF;
 64 |        ({reassign_function, 1}) -> R1; % Return fun.
 65 |        ({reassign_function, 2}) -> R2; % Return fun.
 66 |        ({reassign_function, 3}) -> R3;
 67 |        ({reassign_function, 4}) -> R4;
 68 |        ([_|_]=Value) -> 
 69 |         case F(Value) of
 70 |         W when is_binary(W) -> 
 71 |             bin_to_list2(W);
 72 |         Other -> 
 73 |             Other
 74 |         end 
 75 |       end.
 76 | 
 77 | get_reassign_function(Table, Lvl) ->
 78 |     [{_, Min}] = ets:lookup(Table, {min, Lvl}),
 79 |     [{_, Max}] = ets:lookup(Table, {max, Lvl}),
 80 |     ux_uca_compress:reassign_fun(Lvl, Min, Max).
 81 | 
 82 | get_val(Table, Val) ->
 83 |     [{_, Res}] = ets:lookup(Table, Val),
 84 |     Res.
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | %%
 92 | %% Hacks.
 93 | %%
 94 | 
 95 | do_after([{ducet, Table} | Tail]) ->
 96 |     do_after_ranges(Table),
 97 |     
 98 |     LTable = ets:new(ducet_lookup, [{write_concurrency, false}, {read_concurrency, true}]),
 99 |     do_after_lookup(Table, LTable),
100 | 
101 |     MTable = ets:new(ducet_more, [{write_concurrency, false}, {read_concurrency, true}]),
102 |     do_after_ducet(Table, MTable),
103 | 
104 |     ets:insert(Table, {'LTable', LTable}),
105 |     ets:insert(Table, {'MTable', MTable}),
106 | 
107 |     do_after(Tail);
108 | do_after([_ | Tail]) ->
109 |     do_after(Tail);
110 | do_after([]) -> ok.
111 | 
112 | 
113 | %% @doc Add a table that contains all keys of the elements.
114 | do_after_lookup(Table, LTable) ->
115 | %   ets:safe_fixtable(Table, true),
116 |     case ets:first(Table) of
117 |     '$end_of_table' ->
118 |         ok;
119 |     Index when is_list(Index) ->
120 |         add_lookup(LTable, Index),
121 |         do_after_lookup_next(Table, LTable, Index);
122 |     Index ->
123 |         do_after_lookup_next(Table, LTable, Index)
124 |     end.
125 | %   ets:safe_fixtable(Table, false),
126 |     
127 | do_after_lookup_next(Table, LTable, PrevIndex) ->
128 |     case ets:next(Table, PrevIndex) of
129 |     '$end_of_table' ->
130 |         ok;
131 |     Index when is_list(Index) ->
132 |         add_lookup(LTable, Index),
133 |         do_after_lookup_next(Table, LTable, Index);
134 |     Index ->
135 |         do_after_lookup_next(Table, LTable, Index)
136 |     end.
137 | 
138 | add_lookup(LTable, Index) ->
139 |     Rev = lists:reverse(Index),
140 |     ets:insert(LTable, {Rev}).
141 | 
142 | 
143 | 
144 | %% @doc Add `more' to empty space beetween the elements.
145 | do_after_ducet(Table, MTable) ->
146 | %   ets:safe_fixtable(Table, true),
147 |     case ets:first(Table) of
148 |     '$end_of_table' ->
149 |         ok;
150 |     Index when is_list(Index) ->
151 |         ducet_more(Table, MTable, Index),
152 |         do_after_ducet_next(Table, MTable, Index);
153 |     Index ->
154 |         do_after_ducet_next(Table, MTable, Index)
155 |     end.
156 | %   ets:safe_fixtable(Table, false),
157 |     
158 | do_after_ducet_next(Table, MTable, PrevIndex) ->
159 |     case ets:next(Table, PrevIndex) of
160 |     '$end_of_table' ->
161 |         ok;
162 |     Index when is_list(Index) ->
163 |         ducet_more(Table, MTable, Index),
164 |         do_after_ducet_next(Table, MTable, Index);
165 |     Index ->
166 |         do_after_ducet_next(Table, MTable, Index)
167 |     end.
168 |         
169 | ducet_more(Table, MTable, In) ->
170 |     IF = fun(Val) -> 
171 |         % Insert new value
172 |         ets:insert(Table, {Val, more}),
173 |         Reversed = lists:reverse(Val),
174 |         ets:insert(MTable, {Reversed})
175 |         end,
176 |     
177 |     LF = fun(Val) -> 
178 |         % Lookup
179 |         ets:member(Table, Val)
180 |         end,
181 |         
182 |     do_ducet_more(LF, IF, lists:reverse(In)).
183 | 
184 | do_ducet_more(LF, IF, []) ->
185 |     ok;
186 | do_ducet_more(LF, IF, [El]) ->
187 |     ok;
188 | do_ducet_more(LF, IF, [_Last|ReversedBody] = _Codes) ->
189 |     Body = lists:reverse(ReversedBody),
190 |     case LF(Body) of
191 |     true  -> ok;
192 |     false -> IF(Body)
193 |     end,
194 |     do_ducet_more(LF, IF, ReversedBody).
195 |         
196 | 
197 | 
198 | 
199 | %% @doc Add max, min, common values.
200 | do_after_ranges(Table) ->
201 | %   ets:safe_fixtable(Table, true),
202 |     case ets:first(Table) of
203 |     '$end_of_table' ->
204 |         ok;
205 |     Index ->
206 |         [{_, Val}] = ets:lookup(Table, Index),
207 |         [Init|ValList] = bin_to_list(Val),
208 |         ?DBG(
209 |             "~w:do_after_ranges: Init values: ~w. ~n", 
210 |             [?MODULE, Init]),
211 |         NewMax = lists:foldl(zip2fun(fun max/2), Init, ValList),
212 |         NewMin = lists:foldl(zip2fun(fun min/2), Init, ValList),
213 |         do_after_ranges_next(Table, Index, NewMax, NewMin)
214 |     end.
215 | %   ets:safe_fixtable(Table, false),
216 |     
217 | do_after_ranges_next(Table, PrevIndex, Min, Max) ->
218 |     case ets:next(Table, PrevIndex) of
219 |     '$end_of_table' ->
220 |         InsFn = fun(Type) ->
221 |                 fun(Val, Lvl) ->
222 |                     ets:insert(Table, {{Type, Lvl}, Val}),
223 |                     Lvl + 1
224 |                 end
225 |             end,
226 |         lists:foldl(InsFn(min), 1, Min),
227 |         lists:foldl(InsFn(max), 1, Max),
228 |         
229 |         ok;
230 |     Index ->
231 |         [{_, Val}] = ets:lookup(Table, Index),
232 |         ValList = bin_to_list(Val),
233 |         NewMax = lists:foldl(zip2fun(fun max/2), Max, ValList),
234 |         NewMin = lists:foldl(zip2fun(fun min/2), Min, ValList),
235 |         do_after_ranges_next(Table, Index, NewMin, NewMax)
236 |     end.
237 |         
238 | 
239 | 
240 | 
241 | 
242 | 
243 | %%
244 | %% do_after helpers
245 | %%
246 | 
247 | zip2fun(F) ->
248 |     fun(L1, L2) -> lists:zipwith(F, L1, L2) end.
249 | 
250 | max(V1, V2) when V1 > V2 -> V1;
251 | max(V1, V2) -> V2.
252 | min(V1, V2) when V1 < V2 -> V1;
253 | min(V1, V2) -> V2.
254 | 
255 | %% bin_to_list(Bin) -> lists:map(fun([H|T]) -> T end, bin_to_list2(Bin)).
256 | bin_to_list(Bin) ->
257 |     do_bin_to_list(Bin, []).
258 | 
259 | do_bin_to_list(<<_:8, L1:16, L2:8, L3:8, L4:16, Rem/binary>>, Res) ->
260 |     El = [L1, L2, L3, L4],
261 |     do_bin_to_list(Rem, [El|Res]);
262 | do_bin_to_list(<<>>, Res) ->
263 |     lists:reverse(Res).
264 | 
265 | 
266 | bin_to_list2(Bin) ->
267 |     do_bin_to_list2(Bin, []).
268 | 
269 | do_bin_to_list2(<<T:8, L1:16, L2:8, L3:8, L4:16, Rem/binary>>, Res) ->
270 |     El = [type_atom(T), L1, L2, L3, L4],
271 |     do_bin_to_list2(Rem, [El|Res]);
272 | do_bin_to_list2(<<>>, Res) ->
273 |     lists:reverse(Res).
274 | 
275 | 
276 | %%
277 | %% Helpers
278 | %%
279 | 
280 | %% Parses "[.0000.0000.0000.0000]" to [<<0:8,0:16,0:16,0:16,0:16>>]
281 | parse_el(El) -> 
282 |     ListOfInts = lists:reverse(parse_el(El, [], false, [])),
283 |     el_to_bin(ListOfInts).
284 | 
285 | % Buf - binary bufer
286 | % Acc - string accumulator (f.e. [48,48,48,48])
287 | parse_el([], _, _, Res) -> Res;
288 | parse_el([$[, $. | Tail], _, _, Res) ->
289 |     parse_el(Tail, [], [non_variable], Res); % [.XXXX.XXXX.XXXX.XXXX]
290 | parse_el([$[, $* | Tail], _, _, Res) ->
291 |     parse_el(Tail, [], [variable], Res); % [*XXXX.XXXX.XXXX.XXXX]
292 | parse_el([_|Tail], Acc, false, Res) ->
293 |     parse_el(Tail, Acc, false, Res);
294 | parse_el([$]|Tail], Acc, Buf, Res) ->
295 |     El = lists:reverse(el_res(Acc, Buf)),
296 |     NewRes = split_large_weights(El, Res),
297 |     parse_el(Tail, [], false, NewRes);
298 | parse_el([$.|Tail], Acc, Buf, Res) ->
299 |     parse_el(Tail, [], el_res(Acc, Buf), Res);
300 | parse_el([H|Tail], Acc, Buf, Res) ->
301 |     parse_el(Tail, [H|Acc], Buf, Res).
302 | 
303 | el_res(Acc, Buf) when is_list(Acc), is_list(Buf) ->
304 |     Hex = ux_unidata_parser:hex_to_int(lists:reverse(Acc)),
305 |     [Hex|Buf].
306 | 
307 | split_large_weights([Type, 0, 0, 0, 0], Res) -> Res;
308 | split_large_weights([Type, L1, L2, L3, L4], Res) ->
309 |     L1Max = 16#FFDD,
310 |     L2Max = 16#DD,
311 |     L3Max = 16#DD, % 1F?
312 |     L4Max = 16#FFDD,
313 |     split_large_weights(
314 |         [Type, 
315 |         if L1<L1Max -> 0; true -> L1 - L1Max + 1 end,
316 |         if L2<L2Max -> 0; true -> L2 - L2Max + 1 end,
317 |         if L3<L3Max -> 0; true -> L3 - L3Max + 1 end,
318 |         if L4<L4Max -> 0; true -> L4 - L4Max + 1 end],
319 | 
320 |         [[Type, 
321 |         if L1<L1Max -> L1; true -> L1Max end,
322 |         if L2<L2Max -> L2; true -> L2Max end,
323 |         if L3<L3Max -> L3; true -> L3Max end,
324 |         if L4<L4Max -> L4; true -> L4Max end]|Res]
325 |     ).
326 | 
327 |     
328 | el_to_bin(List) -> do_el_to_bin(List, <<>>).
329 | do_el_to_bin([[Type,L1,L2,L3,L4]|List], Bin) ->
330 |     T = type_int(Type),
331 |     NewBin = <<Bin/binary, T:8, L1:16, L2:8, L3:8, L4:16>>,
332 |     do_el_to_bin(List, NewBin);
333 | do_el_to_bin([], Bin) -> Bin.
334 | 
335 | type_int(non_variable) -> 0;
336 | type_int(variable) -> 1.
337 | 
338 | 
339 | type_atom(0) -> non_variable;
340 | type_atom(1) -> variable.
341 | 
342 | 
343 | %%
344 | %% Tests
345 | %%
346 | -ifdef(TEST).
347 | -include_lib("eunit/include/eunit.hrl").
348 | 
349 | parse_el_test_() ->
350 |     F = fun parse_el/1,
351 |     [?_assertEqual(F("[.0000.0000.0000.0000]"), <<>>)
352 |     ,?_assertEqual(F("[.0001.0002.0003.0004]"), <<0:8, 1:16, 2:8, 3:8, 4:16>>)
353 |     ,?_assertEqual(F("[.0001.0002.0003.0004][*0005.0006.0007.0008]"), 
354 |         <<0:8, 1:16, 2:8, 3:8, 4:16,   1:8, 5:16, 6:8, 7:8, 8:16>>)
355 |     ].
356 | 
357 | 
358 | -endif.
359 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_blocks.erl:
--------------------------------------------------------------------------------
 1 | %%% Blocks.txt
 2 | %%% @private
 3 | -module(ux_unidata_parser_blocks).
 4 | -export([parse/1, types/0, get_function/2]).
 5 | 
 6 | %%% Example:
 7 | %%% ux_unidata_filelist:get_pid({blocks, all, code:priv_dir(ux) ++ "/UNIDATA/Blocks.txt"}).
 8 | -define(TO_INT(C), ux_unidata_parser:hex_to_int(C)).
 9 | 
10 | types() ->
11 |     [block
12 |     ].
13 | 
14 | parse(In) ->
15 |     case In of
16 |     [] -> skip;
17 |     Data -> 
18 | 
19 |         case ux_unidata_parser:split($;, Data) of
20 |         [Code, [_|BlockName]] ->
21 |         Atom = list_to_atom(BlockName),
22 |         ParsedKey = parse_code(Code),
23 |         {ok, 
24 |         [{block,  {ParsedKey, Atom}}
25 |         ]};
26 |         _Skip -> skip
27 |         end
28 |     end.
29 | 
30 | 
31 | get_function(block, Table) ->
32 |     DefValue = other,
33 |     ux_unidata_parser:expand_fun(Table, DefValue).
34 | 
35 | %%
36 | %% Helpers
37 | %%
38 | parse_code(Code) -> case string:tokens(Code, "..") of
39 |     [From, To]  -> {?TO_INT(From), ?TO_INT(To)};
40 |     [Code]      -> ?TO_INT(Code)
41 |     end.
42 | 
43 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_comp_exclusions.erl:
--------------------------------------------------------------------------------
 1 | %%% CompositionExclusions.txt
 2 | %%% @private
 3 | -module(ux_unidata_parser_comp_exclusions).
 4 | -export([parse/1, types/0, get_function/2]).
 5 | %%% Example:
 6 | %%% ux_unidata_filelist:get_pid({comp_exclusions, [comp_exclusions], code:priv_dir(ux) ++ "/UNIDATA/CompositionExclusions.txt"}).
 7 | 
 8 | types() ->
 9 |     [is_exclusion].
10 | 
11 | parse(In) ->
12 |     case ux_unidata_parser:delete_spaces(In) of
13 |     [] -> skip;
14 |     [_|_] = Code -> {ok, 
15 |         [{is_exclusion, {ux_unidata_parser:hex_to_int(Code)}}]
16 |         }
17 |     end.
18 | 
19 | get_function(is_exclusion, Table) -> 
20 |     ux_unidata_parser:bool_fun(Table).
21 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_grapheme_break_property.erl:
--------------------------------------------------------------------------------
 1 | %%% Blocks.txt
 2 | %%% @private
 3 | -module(ux_unidata_parser_grapheme_break_property).
 4 | -export([parse/1, types/0, get_function/2]).
 5 | %       after_parse/1]).
 6 | 
 7 | -define(TO_INT(C), ux_unidata_parser:hex_to_int(C)).
 8 | 
 9 | types() ->
10 |     ['grapheme_break_property'
11 |     ].
12 | 
13 | parse(In) ->
14 |     case In of
15 |     [] -> 'skip';
16 |     Data -> 
17 | 
18 |         case ux_unidata_parser:split($;, Data) of
19 |         [Code, Name] ->
20 |         Atom = list_to_atom(string:strip(Name)),
21 |         ParsedKey = parse_code(string:strip(Code)),
22 |         {ok, 
23 |         [{'grapheme_break_property',  {ParsedKey, Atom}}
24 |         ]};
25 |         _Skip -> 'skip'
26 |         end
27 |     end.
28 | 
29 | 
30 | get_function('grapheme_break_property', Table) ->
31 |     DefValue = 'Any',
32 | %   ux_unidata_parser:expand_fun(Table, DefValue).
33 |     ux_unidata_parser:expand_opt_fun(Table, DefValue).
34 | %   ux_unidata_parser:expand_meta_fun(Table, DefValue).
35 | 
36 | 
37 | 
38 | %after_parse(Ets) ->
39 | %    do_after(Ets),
40 | %    ok.
41 | %
42 | %
43 | %do_after([{_Name, Table} | Tail]) ->
44 | %    ux_unidata_parser:expand_table(Table),
45 | %    do_after(Tail);
46 | %do_after([]) -> ok.
47 | 
48 | %%
49 | %% Helpers
50 | %%
51 | parse_code(Code) -> case string:tokens(Code, "..") of
52 |     [From, To]  -> {?TO_INT(From), ?TO_INT(To)};
53 |     [Code]      -> ?TO_INT(Code)
54 |     end.
55 | 
56 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_norm_props.erl:
--------------------------------------------------------------------------------
 1 | %%% DerivedNormalizationProps.txt
 2 | %%% @private
 3 | -module(ux_unidata_parser_norm_props).
 4 | -export([parse/1, types/0, get_function/2
 5 | %, after_parse/1
 6 | ]).
 7 | 
 8 | %%% Example:
 9 | %%% ux_unidata_filelist:get_pid({norm_props, all, code:priv_dir(ux) ++ "/UNIDATA/DerivedNormalizationProps.txt"}).
10 | -define(TO_INT(C), ux_unidata_parser:hex_to_int(C)).
11 | 
12 | types() ->
13 |     [nfc_qc
14 |     ,nfd_qc
15 |     ,nfkc_qc
16 |     ,nfkd_qc
17 |     ].
18 | 
19 | parse(In) ->
20 |     case ux_unidata_parser:delete_spaces(In) of
21 |     [] -> skip;
22 |     Data -> 
23 |         case ux_unidata_parser:split($;, Data) of
24 |         [Code, Form, Props] when (Form=="NFC_QC" orelse Form=="NFKC_QC" 
25 |                            orelse Form=="NFD_QC" orelse Form=="NFKD_QC")
26 |                              and (Props=="N" 
27 | %                          orelse Props=="Y"  
28 |                            orelse Props=="M") ->
29 |         Atom = list_to_atom(string:to_lower(Props)),
30 |         ParsedKey = parse_code(Code),
31 |         {ok, 
32 |         [{nfc_qc,  case Form of "NFC_QC"  -> {ParsedKey, Atom}; _ -> skip end}
33 |         ,{nfd_qc,  case Form of "NFD_QC"  -> {ParsedKey, Atom}; _ -> skip end}
34 |         ,{nfkc_qc, case Form of "NFKC_QC" -> {ParsedKey, Atom}; _ -> skip end}
35 |         ,{nfkd_qc, case Form of "NFKD_QC" -> {ParsedKey, Atom}; _ -> skip end}
36 |         ]};
37 |         _Skip -> skip
38 |         end
39 |     end.
40 | 
41 | %after_parse(Ets) ->
42 | %    do_after(Ets),
43 | %    ok.
44 | 
45 | 
46 | %do_after([{_Name, Table} | Tail]) ->
47 | %    ux_unidata_parser:expand_table(Table),
48 | %    do_after(Tail);
49 | %do_after([]) -> ok.
50 | 
51 | 
52 | get_function(_Type, Table) ->
53 |     DefValue = y,
54 |     ux_unidata_parser:expand_opt_fun(Table, DefValue).
55 | 
56 | %%
57 | %% Helpers
58 | %%
59 | parse_code(Code) -> case string:tokens(Code, "..") of
60 |     [From, To]  -> {?TO_INT(From), ?TO_INT(To)};
61 |     [Code]      -> ?TO_INT(Code)
62 |     end.
63 | 
64 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_scripts.erl:
--------------------------------------------------------------------------------
 1 | %%% Blocks.txt
 2 | %%% @private
 3 | -module(ux_unidata_parser_scripts).
 4 | -export([parse/1, types/0, get_function/2]).
 5 | 
 6 | -define(TO_INT(C), ux_unidata_parser:hex_to_int(C)).
 7 | -define(CLEAN(S), ux_unidata_parser:delete_spaces(S)).
 8 | 
 9 | types() ->
10 |     [script
11 |     ].
12 | 
13 | parse(In) ->
14 |     case In of
15 |     [] -> skip;
16 |     Data -> 
17 | 
18 |         case ux_unidata_parser:split($;, Data) of
19 |         [Code, Name] ->
20 |         Atom = list_to_atom(?CLEAN(Name)),
21 |         ParsedKey = parse_code(?CLEAN(Code)),
22 |         {ok, 
23 |         [{script,  {ParsedKey, Atom}}
24 |         ]};
25 |         _Skip -> skip
26 |         end
27 |     end.
28 | 
29 | 
30 | get_function(script, Table) ->
31 |     DefValue = 'Unknown',
32 |     ux_unidata_parser:expand_fun(Table, DefValue).
33 | 
34 | %%
35 | %% Helpers
36 | %%
37 | parse_code(Code) -> case string:tokens(Code, "..") of
38 |     [From, To]  -> {?TO_INT(From), ?TO_INT(To)};
39 |     [Code]      -> ?TO_INT(Code)
40 |     end.
41 | 
42 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_unidata.erl:
--------------------------------------------------------------------------------
  1 | %%% Example:
  2 | %%% ux_unidata_filelist:get_pid({unidata, [ccc], code:priv_dir(ux) ++ "/UNIDATA/UnicodeData.txt"}).
  3 | %%% @private
  4 | %%% http://www.ksu.ru/eng/departments/ktk/test/perl/lib/unicode/UCDFF301.html
  5 | -module(ux_unidata_parser_unidata).
  6 | -export([parse/1, types/0, get_function/2]).
  7 | 
  8 | types() ->
  9 |     [to_upper
 10 |     ,to_lower
 11 |     ,is_upper
 12 |     ,is_lower
 13 |     ,type       % General Category
 14 |     ,is_compat
 15 |     ,decomp     % Character Decomposition Mapping
 16 |     ,comp_tag   % compatibility formatting tag
 17 |     ,comp   
 18 |     ,comment 
 19 |     ,ccc        % Canonical Combining Classes
 20 |     ,w3         % Tertiary Weight Assignments (Case or Kana Subtype)
 21 |     ].
 22 | 
 23 | %% w3
 24 | %% http://unicode.org/reports/tr10/#Tertiary_Weight_Table
 25 | 
 26 | 
 27 | parse(In) ->
 28 |     Tokens = ux_unidata_parser:split($;, In),
 29 |     [Code,Comment,Abbr,Ccc,_,DecompMap,_,_,_,_,_,_,UC,LC|_] = Tokens,
 30 |     Compat = case DecompMap of [$<|_] -> true; _ -> false end,
 31 |     Dec = case DecompMap of 
 32 |         [_|_] -> ux_unidata_parser:from_hex(DecompMap);
 33 |         _ -> []
 34 |         end,
 35 | 
 36 |     case ux_unidata_parser:hex_to_int(Code) of 
 37 |     false -> skip;
 38 |     Char -> 
 39 |         Excl = ux_unidata:is_comp_excl(Char),
 40 |         {ok, 
 41 |         [{to_upper, case ux_unidata_parser:hex_to_int(UC) of 
 42 |                         false -> skip; V -> {Char, V} end}
 43 |         ,{to_lower, case ux_unidata_parser:hex_to_int(LC) of
 44 |                         false -> skip; V -> {Char, V} end}
 45 |         ,{is_upper, case Abbr of "Lu" -> {Char}; _ -> skip end}
 46 |         ,{is_lower, case Abbr of "Ll" -> {Char}; _ -> skip end}
 47 | 
 48 |         ,{type,     case Abbr of
 49 |                         [] -> skip; V -> {Char, list_to_atom(V)} end}
 50 | 
 51 |         ,{is_compat, case Compat of true -> {Char}; false -> skip end}
 52 | 
 53 |         ,{decomp,  case Dec of 
 54 |                     [_|_] -> {Char, Dec}; _ -> skip end}
 55 | 
 56 |         ,{comp,    case Dec of 
 57 |                     [D1,D2] when (not Excl)   and is_integer(D1)
 58 |                              and (not Compat) and is_integer(D1)
 59 |                        -> {{D1, D2}, Char}; 
 60 |                     _  -> skip end}
 61 | 
 62 |         ,{w3, case parse_comment(Comment) of
 63 |             skip -> skip;
 64 |             W3Type -> {Char, W3Type} end}
 65 | 
 66 |         ,{comp_tag, case Compat of
 67 |             true -> {Char, parse_tag(DecompMap)};
 68 |             false -> skip end}
 69 | 
 70 |         ,{comment, case Comment of 
 71 |                    [] -> skip;
 72 |                    _  -> {Char, list_to_binary(Comment)} end}
 73 | 
 74 |         ,{ccc,     case string:to_integer(Ccc) of 
 75 |                    {Int, []} when Int>0 -> {Char, Int}; _ -> skip end}
 76 |         ]}
 77 |     end.
 78 |     
 79 | get_function(w3, Table) -> 
 80 |     DefValue = false,
 81 |     ux_unidata_parser:ets_fun(Table, DefValue);
 82 | get_function(comp_tag, Table) -> 
 83 |     DefValue = false,
 84 |     ux_unidata_parser:ets_fun(Table, DefValue);
 85 | get_function(ccc, Table) -> 
 86 |     DefValue = 0,
 87 |     ux_unidata_parser:ets_fun(Table, DefValue);
 88 | get_function(is_upper, Table) -> 
 89 |     ux_unidata_parser:bool_fun(Table);
 90 | get_function(is_lower, Table) -> 
 91 |     ux_unidata_parser:bool_fun(Table);
 92 | get_function(is_compat, Table) -> 
 93 |     ux_unidata_parser:bool_fun(Table);
 94 | get_function(type, Table) -> 
 95 |     DefValue = other,
 96 |     ux_unidata_parser:ets_fun(Table, DefValue);
 97 | get_function(comment, Table) -> 
 98 |     DefValue = <<>>,
 99 |     ux_unidata_parser:ets_fun(Table, DefValue);
100 | get_function(comp, Table) -> 
101 |     DefValue = false,
102 |     ux_unidata_parser:ets_fun(Table, DefValue);
103 | get_function(decomp, Table) -> 
104 |     DefValue = [],
105 |     ux_unidata_parser:ets_fun(Table, DefValue);
106 | get_function(to_upper, Table) -> 
107 |     DefValue = noop, % fun(C) -> C.
108 |     ux_unidata_parser:ets_fun(Table, DefValue);
109 | get_function(to_lower, Table) -> 
110 |     DefValue = noop, % fun(C) -> C.
111 |     ux_unidata_parser:ets_fun(Table, DefValue).
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | %%
122 | %% Helpers
123 | %%
124 | 
125 | parse_tag([$<|T]) -> parse_tag_(T, []).
126 | 
127 | parse_tag_([$>|_], Acc) -> list_to_atom(lists:reverse(Acc));
128 | parse_tag_([H|T],  Acc) -> parse_tag_(T, [H|Acc]).
129 | 
130 | 
131 | parse_comment(Comment) ->
132 |     SH = string:str(Comment, "HIRAGANA LETTER SMALL") > 0,
133 |     NH = string:str(Comment, "HIRAGANA LETTER") > 0,
134 |     SK = string:str(Comment, "KATAKANA LETTER SMALL") > 0,
135 |     NK = string:str(Comment, "KATAKANA LETTER") > 0,
136 | 
137 |     if 
138 |         SH -> small_hiragana;
139 |         NH -> normal_hiragana;
140 |         SK -> small_katakana;
141 |         NK -> normal_katakana;
142 |         true -> skip end.
143 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_parser_word_break_property.erl:
--------------------------------------------------------------------------------
 1 | %%% Blocks.txt
 2 | %%% @private
 3 | -module(ux_unidata_parser_word_break_property).
 4 | -export([parse/1, types/0, get_function/2]).
 5 | %       after_parse/1]).
 6 | 
 7 | -define(TO_INT(C), ux_unidata_parser:hex_to_int(C)).
 8 | 
 9 | types() ->
10 |     ['word_break_property'
11 |     ].
12 | 
13 | parse(In) ->
14 |     case In of
15 |     [] -> 'skip';
16 |     Data -> 
17 | 
18 |         case ux_unidata_parser:split($;, Data) of
19 |         [Code, Name] ->
20 |         Atom = list_to_atom(string:strip(Name)),
21 |         ParsedKey = parse_code(string:strip(Code)),
22 |         {ok, 
23 |         [{'word_break_property',  {ParsedKey, Atom}}
24 |         ]};
25 |         _Skip -> 'skip'
26 |         end
27 |     end.
28 | 
29 | 
30 | get_function('word_break_property', Table) ->
31 |     DefValue = 'Any',
32 |     ux_unidata_parser:expand_opt_fun(Table, DefValue).
33 | 
34 | %%
35 | %% Helpers
36 | %%
37 | parse_code(Code) -> case string:tokens(Code, "..") of
38 |     [From, To]  -> {?TO_INT(From), ?TO_INT(To)};
39 |     [Code]      -> ?TO_INT(Code)
40 |     end.
41 | 
42 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_server.erl:
--------------------------------------------------------------------------------
  1 | %%% @doc This module provides the access to the store of default files.
  2 | %%%      When client runs a function from the ux_unidata module:
  3 | %%%      1. Code from ux_unidata_filelist module check the process dict 
  4 | %%%         and the application enviroments. If they are unefined, then
  5 | %%%         it call this server.
  6 | %%%      2. If this server already loaded this data, it returns it, and
  7 | %%%         the client code put it to the process dictionary.
  8 | %%%      3. If requested data is not loaded, then this server runs
  9 | %%%         an other server (ux_unidata_store), which parsed a default 
 10 | %%%         UNIDATA file.
 11 | %%% @end
 12 | %%%
 13 | %%% @private
 14 | -module(ux_unidata_server).
 15 | -include("ux.hrl").
 16 | 
 17 | -export([start_link/0]).
 18 | -export([init/1, terminate/2, 
 19 |     handle_call/3, handle_info/2, handle_cast/2]).
 20 | -export([set_default/1, get_default/1]).
 21 | 
 22 | -behavior(gen_server).
 23 | 
 24 | %% Exported Client Functions
 25 | %% Operation & Maintenance API
 26 | start_link() ->
 27 |     Arguments = [],
 28 |     Opts = [],
 29 |     gen_server:start_link({local, ?MODULE}, ?MODULE, Arguments, Opts).
 30 | 
 31 | init([]) ->
 32 |     ?DBG("~w~w: All default types were generated. ~n",
 33 |         [?MODULE, self()]),
 34 |     {ok, []}.
 35 | 
 36 | terminate(_Reason, _LoopData) ->
 37 |     ok.
 38 | 
 39 | 
 40 | 
 41 | 
 42 | %% Spawns process which waits result from ux_unidata_store.
 43 | spawn_waiter(LoaderFn, Key) ->
 44 |     spawn_monitor(fun() ->
 45 |         %% Run Long operation.
 46 |         ok = LoaderFn(),
 47 |         Reply = ux_unidata_filelist:get_source_from(process, Key),
 48 |         % Reply to ux_unidata_server.
 49 |         gen_server:cast(?MODULE, {waiter_reply, Key}),
 50 |         % Reply to clients.
 51 |         spawn_waiter_reply(Reply)
 52 |         end).
 53 | 
 54 | spawn_waiter_reply(ReplyVal) ->
 55 |     receive
 56 |     {reply_to, Pid} ->
 57 |         Pid ! {reply_result, ReplyVal},
 58 |         spawn_waiter_reply(ReplyVal)
 59 |     after 5000 ->
 60 |         ok
 61 |     end.
 62 | 
 63 | %% Runs from a client process.
 64 | wait_respond(WaiterPid) ->
 65 |     WaiterPid ! {reply_to, self()},
 66 |     {ok, Result} = 
 67 |         receive
 68 |         {reply_result, Val} -> {ok, Val}
 69 |         after 20000 -> 
 70 |             {error, timeout}
 71 |         end,
 72 |     Result.
 73 |     
 74 | check_key(Key) ->
 75 |     case erlang:get(Key) of
 76 |     Pid when is_pid(Pid) ->
 77 |         case erlang:is_process_alive(Pid) of
 78 |         true -> ok; % still working.
 79 |         false ->
 80 |         % a spawn_waiter process failed and dead.
 81 |         erlang:erase(Key),
 82 |         ok
 83 |         end;
 84 |     Fun when is_function(Fun) -> ok
 85 |     end.
 86 | 
 87 | 
 88 | 
 89 | 
 90 | % Ref stores Key.
 91 | % Key stores Pid of a waiter or Fun.
 92 | % FromPid is a pid of a waiter process.
 93 | handle_info({'DOWN', Ref, process, FromPid, _Reason}, LoopData) ->
 94 |     ?DBG("~w: Delete Pid = ~w from the process dictionary. ~n", 
 95 |         [?MODULE, FromPid]),
 96 | 
 97 |     case erlang:get(Ref) of
 98 |     undefined -> ok;
 99 |     Key -> erlang:erase(Ref), check_key(Key)
100 |     end,
101 |     {noreply, LoopData}.
102 | 
103 | handle_cast({waiter_reply, Key}, LoopData) ->
104 |     ok = load_default(Key),
105 |     {noreply, LoopData}.
106 | 
107 | 
108 | %% I am using PD as a proxy (it is bad).
109 | handle_call({get_default, Key} = V, From, LoopData) ->
110 |     case ux_unidata_filelist:get_source_from(process, Key) of
111 |     undefined -> 
112 |         LoaderFn = fun() -> 
113 |             load_default(Key) 
114 |             end,
115 |         {WaiterPid, Ref} = spawn_waiter(LoaderFn, Key),
116 |         put(Key, WaiterPid),
117 |         put(Ref, Key),
118 |         Reply = WaiterPid,
119 |         {reply, Reply, LoopData};
120 | 
121 |     Fun when is_function(Fun) -> 
122 |         case Fun('test') of
123 |         true ->
124 |             {reply, Fun, LoopData};
125 | 
126 |         % Restart the "dead" process, reload the function
127 |         false ->
128 |             LoaderFn = fun() -> 
129 |                 Fun('reload')
130 |                 end,
131 |             {WaiterPid, Ref} = spawn_waiter(LoaderFn, Key),
132 |             put(Key, WaiterPid),
133 |             put(Ref, Key),
134 |             {reply, WaiterPid, LoopData}
135 |         end;
136 | 
137 |     %% We are still waiting.
138 |     WaiterPid when is_pid(WaiterPid) -> 
139 |         {reply, WaiterPid, LoopData}
140 |     end;
141 | 
142 | handle_call({set_default, Key}, _From, LoopData) ->
143 |     Reply = ux_unidata_filelist:set_source(process, Key),
144 |     {reply, Reply, LoopData}.
145 | 
146 | %%
147 | %% API
148 | %%
149 | 
150 | %% If cannot load data from default sources, then return undefined.
151 | get_default(Key) ->
152 |     Reply = gen_server:call(?MODULE, {get_default, Key}, 60000),
153 |     case Reply of
154 |     Fun when is_function(Fun) -> 
155 |         put(Key, Fun), % Registrate in the dict of the local process.
156 |         Fun;
157 |     WaiterPid when is_pid(WaiterPid) ->
158 |         wait_respond(WaiterPid)
159 |     end.
160 | 
161 | set_default(Key) ->
162 |     gen_server:call(?MODULE, {set_default, Key}, 60000).
163 | 
164 | 
165 | %%
166 | %% Private helpers
167 | %% 
168 | 
169 | %% Load all "columns" from the file, because it will be faster 
170 | %% (minimize file readings).
171 | %%
172 | %% This function is LONG.
173 | load_default({Parser, Type} = _Key) ->
174 |     FileName = ux_unidata:get_source_file(Parser),
175 | %   Types = [Type],
176 |     Types = all,
177 |     ux_unidata_filelist:set_source(process, Parser, Types, FileName).
178 | 
179 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_store.erl:
--------------------------------------------------------------------------------
  1 | %%% @private
  2 | -module(ux_unidata_store).
  3 | -include("ux.hrl").
  4 | 
  5 | -export([start_link/2]).
  6 | -export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2]).
  7 | -export([monitor_client_process/2, get_funs/2]).
  8 | 
  9 | % First argument is a server pid.
 10 | -export([check_types/2, table_list/1, remove_type/2]).
 11 | 
 12 | % This functions are not connected to process directly, 
 13 | % but called from different processes.
 14 | -export([get_env/1]).
 15 | 
 16 | -behavior(gen_server).
 17 | -record(state, {
 18 |     clients = [] :: [pid()],
 19 |     ets = [] :: [{atom(), integer()}],
 20 |     funs = [] :: [{atom(), fun()}],
 21 |     parser :: atom(),
 22 |     types = all :: atom() | [atom()],
 23 |     remain = [] :: [atom()],
 24 |     filename :: string()
 25 | }).
 26 | 
 27 | %% Exported Client Functions
 28 | %% Operation & Maintenance API
 29 | start_link(File, ClientPid) ->
 30 |     % Check a parser module and types.
 31 |     ok = ux_unidata_parser:check(File),
 32 | 
 33 |     % We in the client code
 34 |     % Move some env variables from the client dictionary to the store dicntionary.
 35 |     ClientEnv = get_env(File),
 36 |     Arguments = [File, ClientPid, ClientEnv],
 37 |     Opts = [],
 38 |     Ret = gen_server:start_link(?MODULE, Arguments, Opts).
 39 | 
 40 | 
 41 | init([{ParserType, Types, FileName} = File, ClientPid, ClientEnv]) ->
 42 |     % We in the process code: extract client data.
 43 | 
 44 |     set_env(File, ClientEnv),
 45 | 
 46 |     % Registrate pid of this server.
 47 |     % PS: reg_pid uses env.
 48 |     ux_unidata_filelist:reg_pid(File, self()),
 49 | 
 50 |     set_monitor(ClientPid),
 51 | 
 52 |     % Escape deadlocks.
 53 |     ok = gen_server:cast(self(), {run_parser, File}),
 54 | 
 55 |     LoopData = #state{
 56 |         clients = [ClientPid]
 57 |         },
 58 |     {ok, LoopData}.
 59 | 
 60 | terminate(_Reason, _LoopData) ->
 61 |     ok.
 62 | 
 63 | 
 64 | handle_call({check_types, _Types}, _From, 
 65 |     #state{remain=[]} = LoopData) ->
 66 |     ?DBG("~w~w: All types were already generated. ~n", 
 67 |         [?MODULE, self()]),
 68 |     Reply = ok,
 69 |     {reply, Reply, LoopData};
 70 | % Generate remains.
 71 | handle_call({check_types, all}, From, 
 72 |     #state{remain=Types} = LoopData) ->
 73 |     handle_call({check_types, Types}, From, LoopData);
 74 | handle_call({check_types, Types}, _From, 
 75 |     #state{
 76 |         types=RegistredTypes, 
 77 |         parser=ParserType, 
 78 |         filename=FileName,
 79 |         remain=RemTypes,
 80 |         funs=Funs,
 81 |         ets=Ets} = LoopData) ->
 82 |     NewRemTypes = RemTypes -- Types,
 83 |     {Reply, NewLoopData} = 
 84 |         case NewRemTypes == RemTypes of
 85 |         true  -> 
 86 |             ?DBG("~w~w: Types were already generated. ~n", 
 87 |                 [?MODULE, self()]),
 88 |             {ok, LoopData};
 89 |         false ->
 90 |             AddTypes = Types -- RegistredTypes,
 91 |             File = {ParserType, AddTypes, FileName},
 92 |             {ok, AddedEts, _RemTypesWithRegistred} = 
 93 |                 ux_unidata_parser:run(File),
 94 |             AddedFuns = ux_unidata_parser:get_functions(ParserType, AddedEts),
 95 |             {ok, LoopData#state{
 96 |                     types=AddTypes ++ RegistredTypes,
 97 |                     ets=AddedEts ++ Ets,
 98 |                     remain=NewRemTypes,
 99 |                     funs=AddedFuns ++ Funs
100 |                 }}
101 |         end,    
102 |     {reply, Reply, NewLoopData};
103 | 
104 | handle_call({monitor_client_pid, ClientPid}, _From, 
105 |     #state{clients=Clients} = LoopData) ->
106 |     NewLoopData = case lists:member(ClientPid, Clients) of
107 |         true -> LoopData;
108 |         false ->
109 |             set_monitor(ClientPid),
110 |             NewClients = [ClientPid | Clients],
111 |             LoopData#state{clients=NewClients}
112 |         end,
113 |     Reply = ok,
114 |     {reply, Reply, NewLoopData};
115 | 
116 | 
117 | handle_call({get_funs, all}, _From, 
118 |     #state{funs=Funs} = LoopData) ->
119 |     Reply = Funs,
120 |     {reply, Reply, LoopData};
121 | handle_call({get_funs, Types}, _From, 
122 |     #state{funs=Funs} = LoopData) ->
123 |     ?DBG("~w~w: Try get the list of the functions: ~w. ~n", 
124 |         [?MODULE, self(), Funs]),
125 |     Reply = get_elems(Types, Funs),
126 |     {reply, Reply, LoopData};
127 | 
128 | handle_call(table_list, _From, 
129 |     #state{ets=Ets} = LoopData) ->
130 |     Reply = {ok, Ets},
131 |     {reply, Reply, LoopData}.
132 | 
133 | handle_info({'DOWN', _Ref, process, FromPid, _Reason}, 
134 |     #state{clients=Clients} = LoopData) ->
135 |     ?DBG("~w~w: Delete the process ~w from the process list: ~w. ",
136 |         [?MODULE, self(), FromPid, Clients]),
137 |     NewClients = Clients -- [FromPid],
138 |     case NewClients of
139 |         [] -> % wait 15 second and stop server. 
140 |             Timeout = 15000,
141 |             ?DBG("~w~w: Nobody use this server and ETS table. "
142 |                     "Wait ~w ms and stop. ~n", 
143 |                 [?MODULE, self(), Timeout]),
144 |             
145 |             timer:send_after(Timeout, self(), delete_timeout),
146 |             ok;
147 |         _ -> ok
148 |     end,
149 |     {noreply, LoopData#state{clients=NewClients}};
150 | handle_info(delete_timeout, State=#state{clients=[]}) ->
151 |     ?DBG("~w~w: Nobody use this server and ETS table. Stop. ~n", 
152 |         [?MODULE, self()]),
153 |     {stop, normal, State};
154 | % We have new clients.
155 | handle_info(delete_timeout, LoopData) ->
156 |     ?DBG("~w~w: New users use this server. Cancel stop. ~n", 
157 |         [?MODULE, self()]),
158 |     {noreply, LoopData}.
159 | 
160 | 
161 | handle_cast({run_parser, {ParserType, Types, FileName} = File},
162 |     #state{ets=[]} = LoopData) ->
163 |     % Run parser.
164 |     {ok, Ets, RemTypes} = ux_unidata_parser:run(File),
165 |     Funs = ux_unidata_parser:get_functions(ParserType, Ets),
166 |     ?DBG("~w~w: Init. Parser ~w generated ets: ~w and funs: ~w. ~n", 
167 |         [?MODULE, self(), ParserType, Ets, Funs]),
168 |     NewLoopData = LoopData#state{
169 |         ets    = Ets,
170 |         types  = Types,
171 |         parser = ParserType,
172 |         filename = FileName,
173 |         remain = RemTypes,
174 |         funs   = Funs
175 |         },
176 |     {noreply, NewLoopData};
177 | 
178 | handle_cast({remove_type, Type}, 
179 |     #state{ets=Ets,
180 |         types=Types,
181 |         remain=Remain,
182 |         funs=Funs} = LoopData) ->
183 |     NewLoopData = case lists:keyfind(Type, 1, Ets) of
184 |         false -> LoopData;
185 |         {Type, Table} -> 
186 |             true = ets:delete(Table),
187 |             NewEts = lists:keydelete(Type, 1, Ets),
188 |             NewFuns = lists:keydelete(Type, 1, Funs),
189 |             NewTypes = lists:delete(Type, Types),
190 |             NewRemain = [Type | Remain],
191 |             LoopData#state{ets=NewEts,
192 |                 types=NewTypes,
193 |                 remain=NewRemain,
194 |                 funs=NewFuns}
195 |         end,
196 |     {noreply, NewLoopData}.
197 | 
198 | %% Monitor a proccess which called this function.
199 | %% ServerPid is a pid of gen_server with ETS.
200 | %% If all clients die then gen_server dies.
201 | %% This function is called by fun ux_unidata_filelist:get_pid/2.
202 | monitor_client_process(ServerPid, ClientPid) ->
203 |     ok = gen_server:call(ServerPid, {monitor_client_pid, ClientPid}).
204 | 
205 | %% If all types are not on the server then try to generate them.
206 | check_types(ServerPid, Types) ->
207 |     ok = gen_server:call(ServerPid, {check_types, Types}).
208 | 
209 | remove_type(ServerPid, Type) ->
210 |     ok = gen_server:cast(ServerPid, {remove_type, Type}).
211 | 
212 | table_list(ServerPid) ->
213 |     {ok, TableList} = gen_server:call(ServerPid, table_list),
214 |     TableList.
215 | 
216 | get_funs(ServerPid, Types) ->
217 |     gen_server:call(ServerPid, {get_funs, Types}, 30000).
218 | 
219 | 
220 | 
221 | get_elems(Types, Elems) ->
222 |     lists:reverse(do_get_elems(Types, Elems, [])).
223 | 
224 | do_get_elems([Type|Tail], Elems, Acc) ->
225 |     El = lists:keyfind(Type, 1, Elems),
226 |     true = El =/= false,
227 |     do_get_elems(Tail, Elems, [El|Acc]);
228 | do_get_elems([], _Elems, Acc) -> Acc.
229 | 
230 | 
231 | set_monitor(ClientPid) ->
232 |     ?DBG("~w~w: Set the monitor on the process ~w. ~n", 
233 |         [?MODULE, self(), ClientPid]),
234 |     erlang:monitor(process, ClientPid).
235 |     
236 | 
237 | 
238 | 
239 | %%
240 | %% Helpers
241 | %%
242 | 
243 | get_env(File) ->
244 |     ux_unidata_parser:get_env(File).
245 | 
246 | set_env(File, Env) ->
247 |     ux_unidata_parser:set_env(File, Env).
248 | 


--------------------------------------------------------------------------------
/src/unidata/ux_unidata_store_sup.erl:
--------------------------------------------------------------------------------
 1 | %%% @private
 2 | -module(ux_unidata_store_sup).
 3 | -behavior(supervisor).
 4 | -export([start_link/0]).
 5 | -export([init/1]).
 6 | -export([read_file/2]).
 7 | -export([restart/0]).
 8 | 
 9 | start_link() ->
10 |     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
11 | 
12 | init([]) ->
13 |     ChildSpec = {ux_unidata_store, 
14 |                 {ux_unidata_store, start_link, []},
15 |         temporary, 2000, worker, [ux_unidata_store]},
16 |     {ok, {{simple_one_for_one,0,1}, [ChildSpec]}}.
17 | 
18 | %% @doc Read file with UNIDATA.
19 | %%      Filename is {parser, types, filename}
20 | read_file({_,_,_} = Filename, ClientPid) when is_pid(ClientPid) ->
21 |     SupervisorName = ?MODULE,
22 |     Ret = supervisor:start_child(SupervisorName, [Filename, ClientPid]),
23 |     {ok, ServerPid} = Ret.
24 | 
25 |     
26 | %% @doc Restart this supervisor.
27 | restart() ->
28 |     exit(whereis('ux_unidata_store_sup'), 'kill').
29 | 
30 | 


--------------------------------------------------------------------------------
/src/utils/ux_opt_ranges.erl:
--------------------------------------------------------------------------------
  1 | %% @doc Functions for working with ranges in lists.
  2 | %%
  3 | %%      ETS is fast only as a key-value store.
  4 | %%      But some data files contains ranges: From..To.
  5 | %%      The fastest way is using lists for storing this values.
  6 | %%
  7 | %%      There is two types of these lists:
  8 | %%      * with booleans: `[{1,3}, 6, {8,9}]'. For example, `is_compat';
  9 | %%      * with values: `[{{1,3}, value1}, {{4,12}, value2}]'.
 10 | %%
 11 | %%      `in_list' function is for the first type.
 12 | %%      `search' function is for the second type.
 13 | %%
 14 | %% @end
 15 | 
 16 | -module(ux_opt_ranges).
 17 | -export([in_list/1, search/2]).
 18 | 
 19 | 
 20 | in_list([H|_]=V) ->
 21 |     SortedV = in_list_sort(V),
 22 | 
 23 |     R = erlang:list_to_tuple(
 24 |         lists:map(fun(X) -> [] end, 
 25 |             lists:seq(1,651))),
 26 | 
 27 |     do_in_list(V, R).
 28 | 
 29 | search(Def, V) ->
 30 |     SortedV = search_sort(V),
 31 | 
 32 |     R = erlang:list_to_tuple(
 33 |         lists:map(fun(X) -> [] end, 
 34 |             lists:seq(1,651))),
 35 | 
 36 |     do_search(Def, V, R).
 37 | 
 38 | 
 39 | do_in_list([{H1,H2}=V|T], R) ->
 40 |     I1 = index(H1),
 41 |     I2 = index(H2),
 42 |     R2 = fill_elem(I1, I2, V, R),
 43 |     do_in_list(T, R2);
 44 | 
 45 | do_in_list([H1|T], R) ->
 46 |     R1 = set_elem(H1, H1, R),
 47 |     do_in_list(T, R1);
 48 | 
 49 | do_in_list([], R) ->
 50 |     L = erlang:tuple_to_list(R),
 51 |     ML = lists:map(fun lists:reverse/1, L),
 52 |     MR = erlang:list_to_tuple(ML),
 53 | 
 54 |     fun(X) ->
 55 |         I = index(X),
 56 |         MiniList = erlang:element(I, MR),
 57 |         ux_ranges:in_list(MiniList, X)
 58 |         end.
 59 |     
 60 |     
 61 | 
 62 | % skip
 63 | do_search(Def, [{_,Def}|T], R) ->
 64 |     do_search(Def, T, R);
 65 | 
 66 | do_search(Def, [{{H1,H2},_P}=V|T], R) ->
 67 |     I1 = index(H1),
 68 |     I2 = index(H2),
 69 |     R2 = fill_elem(I1, I2, V, R),
 70 |     do_search(Def, T, R2);
 71 | 
 72 | do_search(Def, [{H1,_P}=V|T], R) ->
 73 |     R1 = set_elem(H1, V, R),
 74 |     do_search(Def, T, R1);
 75 | 
 76 | do_search(Def, [], R) ->
 77 |     L = erlang:tuple_to_list(R),
 78 |     ML = lists:map(fun lists:reverse/1, L),
 79 |     MR = erlang:list_to_tuple(ML),
 80 | 
 81 |     fun(X) ->
 82 |         I = index(X),
 83 |         MiniList = erlang:element(I, MR),
 84 | 
 85 |         case ux_ranges:search(MiniList, X) of
 86 |         false -> Def;
 87 |         P -> P
 88 |         end
 89 |     end.
 90 | 
 91 |     
 92 |     
 93 | 
 94 | 
 95 | set_elem(H, V, R) 
 96 |     when is_tuple(R) ->
 97 |     I = index(H),
 98 |     E = erlang:element(I, R),
 99 |     erlang:setelement(I, R, [V|E]).
100 | 
101 |     
102 | set_elem_i(I, V, R) 
103 |     when is_tuple(R) ->
104 |     E = erlang:element(I, R),
105 |     erlang:setelement(I, R, [V|E]).
106 | 
107 | 
108 | fill_elem(I, I, V, R) ->
109 |     set_elem_i(I, V, R);
110 | fill_elem(I1, I2, V, R) when I1<I2 ->
111 |     NewR = set_elem_i(I1, V, R),
112 |     NewI1 = I1 + 1,
113 |     fill_elem(NewI1, I2, V, NewR).
114 | 
115 | 
116 | index(N) when N > 65000 -> 
117 |     651;
118 | index(N) -> 
119 |     (N div 100) + 1.
120 | 
121 | 
122 | in_list_sort(V) ->
123 |     MF = fun({From,To} = Key) -> 
124 |                 {From, Key};
125 |             (From) ->
126 |                 {From, From}
127 |         end,
128 | 
129 |     MF2 = fun({_,Key}) -> Key end,
130 | 
131 |     V1 = lists:map(MF, V),
132 |     V2 = lists:sort(V1),
133 |     lists:map(MF2, V2).
134 |     
135 | 
136 | 
137 | search_sort(V) ->
138 |      MF = fun(Key) ->
139 |             case erlang:element(1, Key) of
140 |             ({From,To}) -> 
141 |                 {From, Key};
142 |             (From) ->
143 |                 {From, From}
144 |             end
145 |         end,
146 | 
147 |     MF2 = fun({_,Key}) -> Key end,
148 | 
149 |     V1 = lists:map(MF, V),
150 |     V2 = lists:sort(V1),
151 |     lists:map(MF2, V2).
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/src/utils/ux_ranges.erl:
--------------------------------------------------------------------------------
 1 | %% @doc Functions for working with ranges in lists.
 2 | %%
 3 | %%      ETS is fast only as a key-value store.
 4 | %%      But some data files contains ranges: From..To.
 5 | %%      The fastest way is using lists for storing this values.
 6 | %%
 7 | %%      There is two types of these lists:
 8 | %%      * with booleans: `[{1,3}, 6, {8,9}]'. For example, `is_compat';
 9 | %%      * with values: `[{{1,3}, value1}, {{4,12}, value2}]'.
10 | %%
11 | %%      `in_list' function is for the first type.
12 | %%      `search' function is for the second type.
13 | %%
14 | %% @end
15 | 
16 | -module(ux_ranges).
17 | -export([in_list/2,search/2]).
18 | 
19 | -spec in_list([{integer(), integer()} | integer()], integer()) -> 
20 |     boolean().
21 | in_list([H|T], H) 
22 |     when is_integer(H) ->
23 |     true;
24 | in_list([{From, To}|T], V) 
25 |     when V >= From, V =< To ->
26 |     true;
27 | in_list([H|T], V) ->
28 |     in_list(T, V);
29 | in_list([], _V) -> false.
30 | 
31 | -spec search([{{integer(), integer()} | integer(), term()}], integer()) -> 
32 |     boolean().
33 | search([{H,P}|T], H) 
34 |     when is_integer(H) ->
35 |     P;
36 | search([{{From, To},P}|T], V) 
37 |     when V >= From, V =< To ->
38 |     P;
39 | search([_H|T], V) ->
40 |     search(T, V);
41 | search([], _V) -> false.
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/src/ux.app.src:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | {application, ux,
 3 |  [{description, "ux"},
 4 |   {vsn, "3.0.1"},
 5 |   {modules, [ux_unidata, 
 6 |             ux_char, 
 7 |             ux_string,
 8 |             ux_uca,
 9 |  
10 |             ux_html]},
11 |   {registered, []},
12 |   {mod, {'ux_app', []}},
13 |   {env, []},
14 |   {applications, [kernel, stdlib, metamodule]}]}.
15 | 


--------------------------------------------------------------------------------
/src/ux.erl:
--------------------------------------------------------------------------------
 1 | %% @author Uvarov Michael <arcusfelis@gmail.com>
 2 | %% @copyright 2010 ux Uvarov Michael <arcusfelis@gmail.com>
 3 | 
 4 | %% @doc ux.
 5 | %% @private
 6 | 
 7 | -module(ux).
 8 | -author("Uvarov Michael <arcusfelis@gmail.com>").
 9 | -export([start/0, stop/0]).
10 | 
11 | -define(APP, ux).
12 | 
13 | %% @spec start() -> ok
14 | %% @doc Start the ux server.
15 | start() ->
16 |     application:load(?APP),
17 |     {ok, Deps} = application:get_key(?APP, applications),
18 |     true = lists:all(fun ensure_started/1, Deps),
19 |     ux_deps:ensure(),
20 |     application:start(ux).
21 | 
22 | 
23 | %% @spec stop() -> ok
24 | %% @doc Stop the ux server.
25 | stop() ->
26 |     application:stop(ux).
27 | 
28 | 
29 | ensure_started(App) ->
30 |     case application:start(App) of
31 |         ok ->
32 |             true;
33 |         {error, {already_started, App}} ->
34 |             true;
35 |         Else ->
36 |             error_logger:error_msg("Couldn't start ~p: ~p", [App, Else]),
37 |             Else
38 |     end.
39 | 
40 | 


--------------------------------------------------------------------------------
/src/ux.hrl:
--------------------------------------------------------------------------------
  1 | % vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80:
  2 | %%% =====================================================================
  3 | %%% This library is free software; you can redistribute it and/or modify
  4 | %%% it under the terms of the GNU Lesser General Public License as
  5 | %%% published by the Free Software Foundation; either version 2 of the
  6 | %%% License, or (at your option) any later version.
  7 | %%%
  8 | %%% This library is distributed in the hope that it will be useful, but
  9 | %%% WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11 | %%% Lesser General Public License for more details.
 12 | %%%
 13 | %%% You should have received a copy of the GNU Lesser General Public
 14 | %%% License along with this library; if not, write to the Free Software
 15 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 16 | %%% USA
 17 | %%%
 18 | %%% $Id$
 19 | %%%
 20 | %%% @copyright 2010-2011 Michael Uvarov
 21 | %%% @author Michael Uvarov <arcusfelis@gmail.com>
 22 | %%% @see ux
 23 | %%% @end
 24 | %%% =====================================================================
 25 | 
 26 | %-define(SLOW_TESTS, true).
 27 | %-define(UNIDATA_DEBUG, false).
 28 | 
 29 | -ifdef(UNIDATA_DEBUG).
 30 | -define(DBG(X,Y), error_logger:info_msg(X,Y)).
 31 | -else.
 32 | -define(DBG(X,Y), ok).
 33 | -endif.
 34 | 
 35 | 
 36 | -define(UNIDATA_VERSION, "UNIDATA").
 37 | -define(UCADATA_VERSION, "UCA").
 38 | 
 39 | -define(UNIDATA, ux_unidata).
 40 | 
 41 | 
 42 | % Defines Hangul constants
 43 | % Hangul characters can be decompize to LV or LVT forms.
 44 | 
 45 | -define(HANGUL_SBASE,  16#AC00).
 46 | -define(HANGUL_LBASE,  16#1100). % 4352 - 4371
 47 | -define(HANGUL_VBASE,  16#1161). % 4449 - 4470
 48 | -define(HANGUL_TBASE,  16#11A7). % 4519 - 4547
 49 | -define(HANGUL_LCOUNT, 19).
 50 | -define(HANGUL_VCOUNT, 21).
 51 | -define(HANGUL_TCOUNT, 28).
 52 | -define(HANGUL_NCOUNT, 588).
 53 | -define(HANGUL_SCOUNT, 11172).
 54 | 
 55 | -define(HANGUL_SLAST,  (?HANGUL_SBASE + ?HANGUL_SCOUNT)).
 56 | -define(HANGUL_LLAST,  (?HANGUL_LBASE + ?HANGUL_LCOUNT)).
 57 | -define(HANGUL_VLAST,  (?HANGUL_VBASE + ?HANGUL_VCOUNT)).
 58 | -define(HANGUL_TLAST,  (?HANGUL_TBASE + ?HANGUL_TCOUNT)).
 59 | 
 60 | -define(CHAR_IS_HANGUL_L(Ch), (
 61 |  (Ch>=?HANGUL_LBASE) and (Ch=<?HANGUL_LLAST)
 62 | )).
 63 | 
 64 | -define(CHAR_IS_HANGUL_V(Ch), (
 65 |  (Ch>=?HANGUL_VBASE) and (Ch=<?HANGUL_VLAST)
 66 | )).
 67 | 
 68 | -define(CHAR_IS_HANGUL_T(Ch), (
 69 |  (Ch>=?HANGUL_TBASE) and (Ch=<?HANGUL_TLAST)
 70 | )).
 71 | 
 72 | -define(CHAR_IS_DECIMAL(Ch),  (Ch>=$1 andalso Ch=<$0)).
 73 | 
 74 | 
 75 | 
 76 | -define(CHECK_RANGE(X, A, B), (((X) >= (A)) and ((X) =< (B)))).
 77 | -define(CHECK_VALUE(X, A),    (((X) =:= (A)))).
 78 | 
 79 | % CJK_Unified_Ideograph and CJK_Compatibility_Ideographs from 
 80 | % http://www.unicode.org/Public/UNIDATA/Blocks.txt
 81 | %
 82 | % grep "CJK Unified Ideograph" priv/UNIDATA/Blocks.txt 
 83 | % 3400..4DBF; CJK Unified Ideographs Extension A
 84 | % 4E00..9FFF; CJK Unified Ideographs
 85 | % 20000..2A6DF; CJK Unified Ideographs Extension B
 86 | % 2A700..2B73F; CJK Unified Ideographs Extension C
 87 | % 2B740..2B81F; CJK Unified Ideographs Extension D
 88 | -define(CHAR_IS_CJK_UNIFIED_IDEOGRAPH(Ch), (
 89 |        ?CHECK_RANGE(Ch, 16#4E00,  16#9FFF) 
 90 | %   or ?CHECK_RANGE(Ch, 16#3400,  16#4DBF) 
 91 | %   or ?CHECK_RANGE(Ch, 16#20000, 16#2A6DF) 
 92 | %   or ?CHECK_RANGE(Ch, 16#2A700, 16#2B73F) 
 93 | %   or ?CHECK_RANGE(Ch, 16#2B740, 16#2B81F) 
 94 | )).
 95 | 
 96 | % grep "CJK Compatibility Ideograph" priv/UNIDATA/Blocks.txt
 97 | % F900..FAFF; CJK Compatibility Ideographs
 98 | % 2F800..2FA1F; CJK Compatibility Ideographs Supplement
 99 | -define(CHAR_IS_CJK_COMPATIBILITY_IDEOGRAPH(Ch), (
100 |        ?CHECK_RANGE(Ch, 16#F900,  16#FAFF)
101 | %   or ?CHECK_RANGE(Ch, 16#2F800, 16#2FA1F)
102 | )).
103 | 
104 | % Unified_Ideograph from http://unicode.org/Public/UNIDATA/PropList.txt
105 | % grep Unified PropList.txt 
106 | % 3400..4DB5    ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
107 | % 4E00..9FCC    ; Unified_Ideograph # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC
108 | % FA0E..FA0F    ; Unified_Ideograph # Lo   [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
109 | % FA11          ; Unified_Ideograph # Lo       CJK COMPATIBILITY IDEOGRAPH-FA11
110 | % FA13..FA14    ; Unified_Ideograph # Lo   [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
111 | % FA1F          ; Unified_Ideograph # Lo       CJK COMPATIBILITY IDEOGRAPH-FA1F
112 | % FA21          ; Unified_Ideograph # Lo       CJK COMPATIBILITY IDEOGRAPH-FA21
113 | % FA23..FA24    ; Unified_Ideograph # Lo   [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
114 | % FA27..FA29    ; Unified_Ideograph # Lo   [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
115 | % 20000..2A6D6  ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
116 | % 2A700..2B734  ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
117 | % 2B740..2B81D  ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
118 | -define(CHAR_IS_UNIFIED_IDEOGRAPH(Ch), (
119 |        ?CHECK_RANGE(Ch, 16#3400, 16#4DB5) 
120 |     or ?CHECK_RANGE(Ch, 16#4E00, 16#9FCC) 
121 |     or ?CHECK_RANGE(Ch, 16#FA0E, 16#FA0F)
122 |     or ?CHECK_VALUE(Ch, 16#FA11)
123 |     or ?CHECK_VALUE(Ch, 16#FA13)
124 |     or ?CHECK_VALUE(Ch, 16#FA14)
125 |     or ?CHECK_VALUE(Ch, 16#FA1F)
126 |     or ?CHECK_VALUE(Ch, 16#FA21)
127 |     or ?CHECK_VALUE(Ch, 16#FA23)
128 |     or ?CHECK_VALUE(Ch, 16#FA24)
129 |     or ?CHECK_RANGE(Ch, 16#FA27, 16#FA29)
130 |     or ?CHECK_RANGE(Ch, 16#20000, 16#2A6D6)
131 |     or ?CHECK_RANGE(Ch, 16#2A700, 16#2B734)
132 |     or ?CHECK_RANGE(Ch, 16#2B740, 16#2B81D)
133 | )).
134 | 
135 | 
136 | -define(CHAR_IS_HANGUL(Char), 
137 |     Char>=16#1100, Char=<16#11FF % Hangul Jamo 
138 |   ; Char>=16#A960, Char=<16#A97C % Hangul Jamo Extended-A
139 |   ; Char>=16#D7B0, Char=<16#D7C6 % Hangul Jamo Extended-B
140 |   ; Char>=16#D7CB, Char=<16#D7FB % Hangul Jamo Extended-B
141 |   ; Char>=16#3131, Char=<16#318E % Hangul Compatibility Jamo 
142 |   ; Char==17#302E; Char==16#302F % Tone marks (used in Middle Korean) 
143 |   ; Char>=16#AC00, Char=<16#D7A3 % 11,172 precomposed Hangul syllables
144 |   ; Char>=16#3200, Char=<16#321E % For parenthesised 
145 |   ; Char>=16#3260, Char=<16#327E % and circled 
146 |   ; Char>=16#FFDC, Char=<16#FFA0 % For halfwidth 
147 | ).
148 | 


--------------------------------------------------------------------------------
/src/ux_app.erl:
--------------------------------------------------------------------------------
 1 | % vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80:
 2 | %% @author Uvarov Michael <arcusfelis@gmail.com>
 3 | %% @copyright 2010 ux Uvarov Michael <arcusfelis@gmail.com>
 4 | 
 5 | %% @doc Callbacks for the web_col application.
 6 | %% @private
 7 | 
 8 | -module(ux_app).
 9 | -author("Mochi Media <dev@mochimedia.com>").
10 | 
11 | -behaviour(application).
12 | -export([start/2,stop/1]).
13 | 
14 | 
15 | %% @spec start(_Type, _StartArgs) -> ServerRet
16 | %% @doc application start callback for web_col.
17 | start(_Type, _StartArgs) ->
18 |     ux_deps:ensure(),
19 |     ux_sup:start_link().
20 | 
21 | %% @spec stop(_State) -> ServerRet
22 | %% @doc application stop callback for web_col.
23 | stop(_State) ->
24 |     ok.
25 | 


--------------------------------------------------------------------------------
/src/ux_char.erl:
--------------------------------------------------------------------------------
  1 | % vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80:
  2 | %%%
  3 | %%% @copyright 2010-2011 Michael Uvarov
  4 | %%% @author Michael Uvarov <arcusfelis@gmail.com>
  5 | %%%
  6 | %%% =====================================================================
  7 | %%% This library is free software; you can redistribute it and/or modify
  8 | %%% it under the terms of the GNU Lesser General Public License as
  9 | %%% published by the Free Software Foundation; either version 2 of the
 10 | %%% License, or (at your option) any later version.
 11 | %%%
 12 | %%% This library is distributed in the hope that it will be useful, but
 13 | %%% WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 15 | %%% Lesser General Public License for more details.
 16 | %%%
 17 | %%% You should have received a copy of the GNU Lesser General Public
 18 | %%% License along with this library; if not, write to the Free Software
 19 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 20 | %%% USA
 21 | %%%
 22 | %%% $Id$
 23 | %%% =====================================================================
 24 | 
 25 | %%% =====================================================================
 26 | %%%   Copyright 2011 Uvarov Michael 
 27 | %%%
 28 | %%%   Licensed under the Apache License, Version 2.0 (the "License");
 29 | %%%   you may not use this file except in compliance with the License.
 30 | %%%   You may obtain a copy of the License at
 31 | %%%
 32 | %%%       http://www.apache.org/licenses/LICENSE-2.0
 33 | %%%
 34 | %%%   Unless required by applicable law or agreed to in writing, software
 35 | %%%   distributed under the License is distributed on an "AS IS" BASIS,
 36 | %%%   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 37 | %%%   See the License for the specific language governing permissions and
 38 | %%%   limitations under the License.
 39 | %%%
 40 | %%% $Id$
 41 | %%% =====================================================================
 42 | 
 43 | 
 44 | %%% @doc Character functions.
 45 | 
 46 | 
 47 | -module(ux_char).
 48 | -author('Uvarov Michael <arcusfelis@gmail.com>').
 49 | 
 50 | -export([comment/1, type/1, block/1, script/1,
 51 |         to_lower/1, to_upper/1, to_ncr/1,
 52 |         is_lower/1, is_upper/1, 
 53 |         is_letter/1, is_number/1, is_decimal/1, is_mark/1, 
 54 |         is_separator/1, is_punctuation_mark/1, 
 55 |         is_hangul/1, is_acsii/1, 
 56 |         is_cjk_compatibility_ideograph/1, 
 57 |         is_cjk_unified_ideograph/1, 
 58 |         is_unified_ideograph/1, 
 59 |         is_hangul_precomposed/1 
 60 |         ]).
 61 | -include("ux.hrl").
 62 | 
 63 | -type char_type() :: ux_types:char_type().
 64 | 
 65 | 
 66 | -spec to_lower(char()) -> char(); 
 67 |         (skip_check) -> fun().
 68 | 
 69 | to_lower(V) -> 
 70 |     ?UNIDATA:char_to_lower(V).
 71 | 
 72 | 
 73 | -spec to_upper(char()) -> char(); 
 74 |         (skip_check) -> fun().
 75 | 
 76 | to_upper(V) -> 
 77 |     ?UNIDATA:char_to_upper(V).
 78 | 
 79 | 
 80 | -spec is_lower(char()) -> boolean();
 81 |         (skip_check) -> fun().
 82 | 
 83 | is_lower(V) -> 
 84 |     ?UNIDATA:is_lower(V).
 85 | 
 86 | 
 87 | -spec is_upper(char()) -> boolean(); 
 88 |         (skip_check) -> fun().
 89 | 
 90 | is_upper(V) -> 
 91 |     ?UNIDATA:is_upper(V).
 92 | 
 93 | 
 94 | -spec comment(char()) -> binary();
 95 |         (skip_check) -> fun().
 96 | 
 97 | comment(V) -> 
 98 |     ?UNIDATA:char_comment(V).
 99 | 
100 | 
101 | -spec type(char()) -> char_type(); 
102 |         (skip_check) -> fun().
103 | 
104 | type(V) -> 
105 |     ?UNIDATA:char_type(V).
106 | 
107 | 
108 | -spec is_acsii(char()) -> boolean().
109 | is_acsii(Char) when (Char>=0) and (Char=<16#7F) -> true;
110 | is_acsii(_) -> false.
111 | 
112 | %% @doc Returns true, if C is a letter.
113 | -spec is_letter(C::char()) -> boolean().
114 | 
115 | is_letter(C) ->
116 |     case erlang:atom_to_list(type(C)) of
117 |     [$L,_] -> true;
118 |     _      -> false
119 |     end.
120 | 
121 | is_mark(C) ->
122 |     case erlang:atom_to_list(type(C)) of
123 |     [$M,_] -> true;
124 |     _      -> false
125 |     end.
126 | 
127 | %% @doc Return true, if C is a decimal number.
128 | -spec is_decimal(C::char()) -> boolean().
129 | 
130 | is_decimal(C) -> type(C) == 'Nd'.
131 | 
132 | 
133 | %% @doc Returns true, if is C is a number.
134 | -spec is_number(C::char()) -> boolean().
135 | 
136 | is_number(C) ->
137 |     case erlang:atom_to_list(type(C)) of
138 |     [$N,_] -> true;
139 |     _      -> false
140 |     end.
141 | 
142 | %% @doc Return true, if is C is a separator.
143 | -spec is_separator(C::char()) -> boolean().
144 | 
145 | is_separator(C) ->
146 |     case erlang:atom_to_list(type(C)) of
147 |     [$Z,_] -> true;
148 |     _      -> false
149 |     end.
150 | 
151 | %% @doc Returns true, if is C is a punctiation mark.
152 | -spec is_punctuation_mark(C::char()) -> boolean().
153 | 
154 | is_punctuation_mark(C) ->
155 |     case erlang:atom_to_list(type(C)) of
156 |     [$P,_] -> true;
157 |     _      -> false
158 |     end.
159 | 
160 | -spec to_ncr(char()) -> string().
161 | to_ncr(Char) when Char =< 16#7F ->
162 |     [Char]; % one-byte character
163 | to_ncr(Char) when Char =< 16#C2 ->
164 |     [];     % non-utf8 character or not a start byte
165 | to_ncr(Char) ->
166 |     lists:flatten(io_lib:format("&#~p;", [Char])).
167 | 
168 | 
169 | %% http://unicode.org/reports/tr15/#Hangul
170 | is_hangul(Char) when
171 |     ?CHAR_IS_HANGUL(Char)
172 |              -> true;
173 | is_hangul(_) -> false.
174 | 
175 | is_hangul_precomposed(Char)
176 |     when Char>=16#AC00, Char=<16#D7A3
177 |         % 11,172 precomposed Hangul syllables
178 |                          -> true;
179 | is_hangul_precomposed(_) -> false.
180 | 
181 | is_cjk_compatibility_ideograph(Ch) when
182 |     ?CHAR_IS_CJK_COMPATIBILITY_IDEOGRAPH(Ch) -> true;
183 | is_cjk_compatibility_ideograph(_) -> false.
184 | 
185 | is_cjk_unified_ideograph(Ch) when
186 |     ?CHAR_IS_CJK_UNIFIED_IDEOGRAPH(Ch) -> true;
187 | is_cjk_unified_ideograph(_) -> false.
188 | 
189 | is_unified_ideograph(Ch) when
190 |     ?CHAR_IS_UNIFIED_IDEOGRAPH(Ch) -> true;
191 | is_unified_ideograph(_) -> false.
192 | 
193 | -spec block(char) -> atom();
194 |         (skip_check) -> fun().
195 | block(V) -> ?UNIDATA:char_block(V).
196 | 
197 | -spec script(char) -> atom();
198 |         (skip_check) -> fun().
199 | script(V) -> ?UNIDATA:char_script(V).
200 | 


--------------------------------------------------------------------------------
/src/ux_deps.erl:
--------------------------------------------------------------------------------
 1 | %% @author Uvarov Michael <arcusfelis@gmail.com>
 2 | %% @copyright 2010 ux Uvarov Michael <arcusfelis@gmail.com>
 3 | %%
 4 | %% @doc Ensure that the relatively-installed dependencies are on the code
 5 | %%      loading path, and locate resources relative
 6 | %%      to this application's path.
 7 | 
 8 | %% @private
 9 | 
10 | -module(ux_deps).
11 | -author("Uvarov Michael <arcusfelis@gmail.com>").
12 | 
13 | -export([ensure/0, ensure/1]).
14 | -export([get_base_dir/0, get_base_dir/1]).
15 | -export([local_path/1, local_path/2]).
16 | -export([deps_on_path/0, new_siblings/1]).
17 | 
18 | %% @spec deps_on_path() -> [ProjNameAndVers]
19 | %% @doc List of project dependencies on the path.
20 | deps_on_path() ->
21 |     F = fun (X, Acc) ->
22 |                 ProjDir = filename:dirname(X),
23 |                 case {filename:basename(X),
24 |                       filename:basename(filename:dirname(ProjDir))} of
25 |                     {"ebin", "deps"} ->
26 |                         [filename:basename(ProjDir) | Acc];
27 |                     _ ->
28 |                         Acc
29 |                 end
30 |         end,
31 |     ordsets:from_list(lists:foldl(F, [], code:get_path())).
32 | 
33 | %% @spec new_siblings(Module) -> [Dir]
34 | %% @doc Find new siblings paths relative to Module that aren't already on the
35 | %%      code path.
36 | new_siblings(Module) ->
37 |     Existing = deps_on_path(),
38 |     SiblingEbin = filelib:wildcard(local_path(["deps", "*", "ebin"], Module)),
39 |     Siblings = [filename:dirname(X) || X <- SiblingEbin,
40 |                            ordsets:is_element(
41 |                              filename:basename(filename:dirname(X)),
42 |                              Existing) =:= false],
43 |     lists:filter(fun filelib:is_dir/1,
44 |                  lists:append([[filename:join([X, "ebin"]),
45 |                                 filename:join([X, "include"])] ||
46 |                                   X <- Siblings])).
47 | 
48 | 
49 | %% @spec ensure(Module) -> ok
50 | %% @doc Ensure that all ebin and include paths for dependencies
51 | %%      of the application for Module are on the code path.
52 | ensure(Module) ->
53 |     code:add_paths(new_siblings(Module)),
54 | %   code:clash(),
55 |     ok.
56 | 
57 | %% @spec ensure() -> ok
58 | %% @doc Ensure that the ebin and include paths for dependencies of
59 | %%      this application are on the code path. Equivalent to
60 | %%      ensure(?Module).
61 | ensure() ->
62 |     ensure(?MODULE).
63 | 
64 | %% @spec get_base_dir(Module) -> string()
65 | %% @doc Return the application directory for Module. It assumes Module is in
66 | %%      a standard OTP layout application in the ebin or src directory.
67 | get_base_dir(Module) ->
68 |     {file, Here} = code:is_loaded(Module),
69 |     filename:dirname(filename:dirname(Here)).
70 | 
71 | %% @spec get_base_dir() -> string()
72 | %% @doc Return the application directory for this application. Equivalent to
73 | %%      get_base_dir(?MODULE).
74 | get_base_dir() ->
75 |     get_base_dir(?MODULE).
76 | 
77 | %% @spec local_path([string()], Module) -> string()
78 | %% @doc Return an application-relative directory from Module's application.
79 | local_path(Components, Module) ->
80 |     filename:join([get_base_dir(Module) | Components]).
81 | 
82 | %% @spec local_path(Components) -> string()
83 | %% @doc Return an application-relative directory for this application.
84 | %%      Equivalent to local_path(Components, ?MODULE).
85 | local_path(Components) ->
86 |     local_path(Components, ?MODULE).
87 | 


--------------------------------------------------------------------------------
/src/ux_gb.erl:
--------------------------------------------------------------------------------
  1 | %%% @doc Default Grapheme Cluster Boundary Breaker
  2 | %%% 
  3 | %%%      [UTR29: Grapheme Cluster Boundaries]
  4 | %%%      (http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
  5 | %%% @end
  6 | 
  7 | -module(ux_gb).
  8 | -include("ux.hrl").
  9 | -export([split/2]).
 10 | 
 11 | 
 12 | % Carriage Return
 13 | -define(CR, 16#000D).
 14 | 
 15 | % Line Feed
 16 | -define(LF, 16#000A).
 17 | 
 18 | % Zero width non-joiner
 19 | -define(ZWNJ, 16#200C).
 20 | 
 21 | % Zero width joiner
 22 | -define(ZWJ, 16#200D).
 23 | 
 24 | 
 25 | 
 26 | %% Adds the atom 'x' between non-breaked characters.
 27 | split(T,S) when
 28 |     T=:='legacy';
 29 |     T=:='extended' -> 
 30 |     Acc = [],
 31 | 
 32 |     % extract general classes
 33 |     TypesFn = ux_unidata:break_props('grapheme'),
 34 |     Types = lists:map(TypesFn, S),
 35 | 
 36 |     do_split(T, S, Types, Acc).
 37 | 
 38 | 
 39 | %% http://unicode.org/reports/tr29/#Table_Combining_Char_Sequences_and_Grapheme_Clusters
 40 | 
 41 | % GB1
 42 | % sot -
 43 | 
 44 | % GB2
 45 | % - sot
 46 | 
 47 | % GB3
 48 | % CR x LF
 49 | do_split(T, [_CR,_LF|ST], 
 50 |             ['CR','LF'|TT], Acc) ->
 51 |     NewAcc = [?LF,'x',?CR|Acc],
 52 |     do_split(T, ST, TT, NewAcc);
 53 | 
 54 | % GB4
 55 | % ( Control | CR | LF ) -
 56 | do_split(T, [SH|ST], 
 57 |             [_|TT = [TH|_]], Acc) 
 58 |     when TH=:='Control'
 59 |        ; TH=:='CR'
 60 |        ; TH=:='LF' ->
 61 |     NewAcc = [SH|Acc],
 62 |     do_split(T, ST, TT, NewAcc);
 63 | 
 64 | % GB5 - ( Control | CR | LF ) 
 65 | do_split(T, [SH|ST], 
 66 |             [TH|TT], ['x'|Acc]) 
 67 |     when TH=:='Control'
 68 |        ; TH=:='CR'
 69 |        ; TH=:='LF' ->
 70 |     NewAcc = [SH|Acc],
 71 |     do_split(T, ST, TT, NewAcc);
 72 | 
 73 | do_split(T, [SH|ST], 
 74 |             [TH|TT], Acc) 
 75 |     when TH=:='Control'
 76 |        ; TH=:='CR'
 77 |        ; TH=:='LF' ->
 78 |     NewAcc = [SH|Acc],
 79 |     do_split(T, ST, TT, NewAcc);
 80 |     
 81 | % GB6
 82 | % L x ( L | V | LV | LVT )
 83 | do_split(T, [SH|ST], 
 84 |             ['L'|TT = [TH2|_]], Acc) 
 85 |     when TH2=:='L'
 86 |        ; TH2=:='V'
 87 |        ; TH2=:='LV'
 88 |        ; TH2=:='LVT' ->
 89 |     NewAcc = ['x',SH|Acc],
 90 |     do_split(T, ST, TT, NewAcc);
 91 | 
 92 | % GB7
 93 | % ( LV | V ) x ( V | T )
 94 | do_split(T, [SH|ST], 
 95 |             [TH1|TT = [TH2|_]], Acc) 
 96 |     when (TH2=:='V'  orelse TH2=:='T')
 97 |      and (TH1=:='LV' orelse TH1=:='V') ->
 98 |     NewAcc = ['x',SH|Acc],
 99 |     do_split(T, ST, TT, NewAcc);
100 | 
101 | % GB8
102 | % ( LVT | T) x T
103 | do_split(T, [SH|ST], 
104 |             [TH1|TT = ['T'|_]], Acc) 
105 |     when TH1=:='LVT'
106 |        ; TH1=:='T' ->
107 |     NewAcc = ['x',SH|Acc],
108 |     do_split(T, ST, TT, NewAcc);
109 | 
110 |  
111 | % GB 9
112 | % x Extend
113 | do_split(T, [SH|ST], 
114 |             [_|TT = ['Extend'|_]], Acc) ->
115 |     NewAcc = ['x',SH|Acc],
116 |     do_split(T, ST, TT, NewAcc);
117 | 
118 | % GB 9a
119 | % x SpacingMark
120 | do_split('extended'=T, 
121 |             [SH|ST], 
122 |             [_|TT = ['SpacingMark'|_]], Acc)
123 |      ->
124 |     NewAcc = ['x',SH|Acc],
125 |     do_split(T, ST, TT, NewAcc);
126 | 
127 | % GB 9b
128 | % Prepend x
129 | do_split('extended'=T, 
130 |             [SH|ST], 
131 |             ['Prepend'|TT = [_|_]], Acc) ->
132 |     NewAcc = ['x',SH|Acc],
133 |     do_split(T, ST, TT, NewAcc);
134 | 
135 | % GB 10
136 | % Any - Any
137 | do_split(T, 
138 |             [SH|ST], 
139 |             [_|TT], Acc) ->
140 |     NewAcc = [SH|Acc],
141 |     do_split(T, ST, TT, NewAcc);
142 | 
143 | do_split(_T, [], [], ['x'|Acc]) ->
144 |     lists:reverse(Acc);
145 | 
146 | do_split(_T, [], [], Acc) ->
147 |     lists:reverse(Acc).
148 | 
149 |     
150 | 


--------------------------------------------------------------------------------
/src/ux_sup.erl:
--------------------------------------------------------------------------------
 1 | %% @doc Supervisor for the ux application.
 2 | %% @private
 3 | 
 4 | -module(ux_sup).
 5 | 
 6 | -behaviour(supervisor).
 7 | 
 8 | %% External exports
 9 | -export([start_link/0]).
10 | 
11 | %% supervisor callbacks
12 | -export([init/1]).
13 | 
14 | %% @spec start_link() -> ServerRet
15 | %% @doc API for starting the supervisor.
16 | start_link() ->
17 |     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
18 | 
19 | %% @spec init([]) -> SupervisorTree
20 | %% @doc supervisor callback.
21 | init([]) ->
22 |     % Control unidata stores.
23 |     StoreSup = {ux_unidata_store_sup, 
24 |         {ux_unidata_store_sup, start_link, []},
25 |         permanent, infinity, supervisor, [ux_unidata_store_sup]},
26 | 
27 |     % Provide information about unidata stores.
28 |     FileListWorker = {ux_unidata_filelist, 
29 |         {ux_unidata_filelist, start_link, []},
30 |         permanent, 2000, worker, [ux_unidata_filelist]},
31 | 
32 |     % Provide a global dictionary for this node.
33 |     DefaultUnidataWorker = {ux_unidata_server, 
34 |         {ux_unidata_server, start_link, []},
35 |         permanent, 10000, worker, [ux_unidata_server]},
36 | 
37 |     % Can add functions to a module on the fly
38 |     MetaWorker = {ux_unidata_meta, 
39 |         {metamodule, start_link, [ux_unidata_meta]},
40 |         permanent, 10000, worker, [metamodule]},
41 | 
42 |     Strategy = {one_for_one, 10, 10},
43 |     {ok, {Strategy, [StoreSup, FileListWorker, DefaultUnidataWorker, MetaWorker]}}.
44 | 
45 | 


--------------------------------------------------------------------------------
/src/ux_types.erl:
--------------------------------------------------------------------------------
 1 | -module(ux_types).
 2 | 
 3 | -type char_type() ::     
 4 | % Normative Categories:
 5 |       lu % Letter, Uppercase
 6 |     | ll % Letter, Lowercase
 7 |     | lt % Letter, Titlecase
 8 |     | mn % Mark, Non-Spacing
 9 |     | mc % Mark, Spacing Combining
10 |     | me % Mark, Enclosing
11 |     | nd % Number, Decimal Digit
12 |     | nl % Number, Letter
13 |     | no % Number, Other
14 |     | zs % Separator, Space
15 |     | zl % Separator, Line
16 |     | zp % Separator, Paragraph
17 |     | cc % Other, Control
18 |     | cf % Other, Format
19 |     | cs % Other, Surrogate
20 |     | co % Other, Private Use
21 |     | cn % Other, Not Assigned (no characters in the file have this property)
22 | % Informative Categories:
23 |     | lm % Letter, Modifier
24 |     | lo % Letter, Other
25 |     | pc % Punctuation, Connector
26 |     | pd % Punctuation, Dash
27 |     | ps % Punctuation, Open
28 |     | pe % Punctuation, Close
29 |     | pi % Punctuation, Initial quote (may behave like Ps or Pe depending on
30 |          % usage)
31 |     | pf % Punctuation, Final quote (may behave like Ps or Pe depending on usage)
32 |     | po % Punctuation, Other
33 |     | sm % Symbol, Math
34 |     | sc % Symbol, Currency
35 |     | sk % Symbol, Modifier
36 |     | so % Symbol, Other
37 |     | other
38 | .
39 | 
40 | -type ux_ccc() :: 0..240.
41 | 
42 | -export_type([char_type/0, 
43 |         ux_ccc/0]).
44 | 


--------------------------------------------------------------------------------
/src/ux_unidata.erl:
--------------------------------------------------------------------------------
  1 | % vim: set filetype=erlang shiftwidth=4 tabstop=4 expandtab tw=80:
  2 | %%% =====================================================================
  3 | %%% This library is free software; you can redistribute it and/or modify
  4 | %%% it under the terms of the GNU Lesser General Public License as
  5 | %%% published by the Free Software Foundation; either version 2 of the
  6 | %%% License, or (at your option) any later version.
  7 | %%%
  8 | %%% This library is distributed in the hope that it will be useful, but
  9 | %%% WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | %%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11 | %%% Lesser General Public License for more details.
 12 | %%%
 13 | %%% You should have received a copy of the GNU Lesser General Public
 14 | %%% License along with this library; if not, write to the Free Software
 15 | %%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 16 | %%% USA
 17 | %%%
 18 | %%% $Id$
 19 | %%%
 20 | %%% @copyright 2010-2011 Michael Uvarov
 21 | %%% @author Michael Uvarov <arcusfelis@gmail.com>
 22 | %%% =====================================================================
 23 | 
 24 | %%% @doc Functions for extraction UNIDATA.
 25 | %%%      UNIDATA is a part of The Unicode Character Database (UCD).
 26 | %%%      For character properties, casing behavior, default line-, word-,
 27 | %%%      cluster-breaking behavior, etc.
 28 | %%%      http://unicode.org/ucd/
 29 | %%%
 30 | %%%      This file calls ux_unidata_filelist.
 31 | %%%      ux_unidata_filelist contains the list of available files.
 32 | %%%      ux_unidata_filelist returns the anonymous function.
 33 | %%%      Fun extracts information from ETS table.
 34 | %%%      The ETS tables were generated by ux_unidata_store. 
 35 | %%%      ux_unidata_store is the owner of the list of ETS tables.
 36 | %%%      ux_unidata_store runs ux_unidata_parser, which runs one of
 37 | %%%      ux_unidata_parser_*.
 38 | %%%      ux_unidata_parser reads file and put information to the ETS table.
 39 | %%%
 40 | %%%      Each file with UNIDATA is parsed to the list of the ETS tables.
 41 | %%%      Fun can read from ETS table.
 42 | %%%      If ETS table will be deleted, then fun will be reloaded.
 43 | %%% @end
 44 | %%% @private
 45 | 
 46 | 
 47 | -module(ux_unidata).
 48 | -author('Uvarov Michael <arcusfelis@gmail.com>').
 49 | -export([get_source_file/1, get_test_file/1, open_test_file/1]).
 50 | -export([char_to_upper/1, char_to_lower/1, is_upper/1, is_lower/1,
 51 |         char_comment/1, char_type/1, ccc/1, 
 52 |         nfc_qc/1, nfd_qc/1, nfkc_qc/1, nfkd_qc/1, 
 53 |         is_comp_excl/1, is_compat/1, decomp/1, comp/2, comp/1,
 54 |         ducet/1, char_block/1, char_script/1,
 55 | 
 56 |         break_props/1, tertiary_weight/1]).
 57 | 
 58 | -include("ux.hrl").
 59 | 
 60 | -type ux_ccc() :: ux_types:ux_ccc().
 61 | 
 62 | 
 63 | 
 64 | 
 65 | priv_dir() ->
 66 |     case code:priv_dir(ux) of
 67 |         [_|_] = Res -> Res;
 68 |         _ -> "../priv"
 69 |     end.
 70 | 
 71 | 
 72 | %% Return path to directory with testing data files.
 73 | test_dir() ->
 74 |     case code:lib_dir(ux, testing) of
 75 |         [_|_] = Res -> Res;
 76 |         _ -> "../testing"
 77 |     end.
 78 | 
 79 | 
 80 | get_dir('ucd') -> priv_dir() ++ "/"  ?UNIDATA_VERSION  "/";
 81 | get_dir('uca') -> priv_dir() ++ "/"  ?UCADATA_VERSION  "/".
 82 | 
 83 | 
 84 | get_test_dir('ucd') -> test_dir() ++ "/"  ?UNIDATA_VERSION  "/";
 85 | get_test_dir('uca') -> test_dir() ++ "/"  ?UCADATA_VERSION  "/".
 86 | 
 87 | 
 88 | -spec get_source_file(Parser::atom()) -> string().
 89 | get_source_file('allkeys') ->
 90 |     get_dir('uca') ++ "/allkeys.txt.gz";
 91 | get_source_file('blocks') ->
 92 |     get_dir('ucd') ++ "/Blocks.txt";
 93 | get_source_file('scripts') ->
 94 |     get_dir('ucd') ++ "/Scripts.txt";
 95 | get_source_file('comp_exclusions') ->
 96 |     get_dir('ucd') ++ "/CompositionExclusions.txt";
 97 | get_source_file('norm_props') ->
 98 |     get_dir('ucd') ++ "/DerivedNormalizationProps.txt.gz";
 99 | get_source_file('unidata') ->
100 |     get_dir('ucd') ++ "/UnicodeData.txt.gz";
101 | get_source_file('grapheme_break_property') ->
102 |     get_dir('ucd') ++ "/auxiliary/GraphemeBreakProperty.txt.gz";
103 | get_source_file('word_break_property') ->
104 |     get_dir('ucd') ++ "/auxiliary/WordBreakProperty.txt.gz".
105 | 
106 | 
107 | 
108 | 
109 | get_test_file('normalization_test') ->
110 |     get_test_dir('ucd') ++ "NormalizationTest.txt.gz";
111 | 
112 | get_test_file('collation_test_shifted') ->
113 |     get_test_dir('uca') ++ "CollationTest/" 
114 |                     % Slow, with comments.
115 | %                   "CollationTest_SHIFTED.txt";
116 |                     "CollationTest_SHIFTED_SHORT.txt.gz";
117 | 
118 | get_test_file('collation_test_non_ignorable') ->
119 |     get_test_dir('uca') ++ "CollationTest/" 
120 | %                   "CollationTest_NON_IGNORABLE.txt", 
121 |                     % Fast version (data from slow version are equal).
122 |                     "CollationTest_NON_IGNORABLE_SHORT.txt.gz";
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | get_test_file('grapheme_break_test') ->
133 |     get_dir('ucd') ++ "/auxiliary/GraphemeBreakTest.txt.gz";
134 | get_test_file('word_break_test') ->
135 |     get_dir('ucd') ++ "/auxiliary/WordBreakTest.txt.gz".
136 | 
137 | 
138 | open_test_file(Id) ->
139 |     Filename = get_test_file(Id),
140 |     ux_unidata_parser:open_file(Filename).
141 | 
142 | 
143 | 
144 | -spec char_to_lower(char()) -> char(); 
145 |         (skip_check) -> fun().
146 | 
147 | char_to_lower(C) -> 
148 |     func(unidata, to_lower, C).
149 | 
150 | 
151 | -spec char_to_upper(char()) -> char(); 
152 |         (skip_check) -> fun().
153 | 
154 | char_to_upper(C) -> 
155 |     func(unidata, to_upper, C).
156 | 
157 | 
158 | -spec is_lower(char()) -> boolean(); 
159 |         (skip_check) -> fun().
160 | 
161 | is_lower(C) -> 
162 |     func(unidata, is_lower, C).
163 | 
164 | 
165 | -spec is_upper(char()) -> boolean(); 
166 |         (skip_check) -> fun().
167 | 
168 | is_upper(C) -> 
169 |     func(unidata, is_upper, C).
170 | 
171 | 
172 | -spec char_type(C::char()) -> atom();
173 |         (skip_check) -> fun().
174 | 
175 | char_type(C) -> 
176 |     func(unidata, type, C).
177 | 
178 | 
179 | -spec char_comment(C::char()) -> binary();
180 |         (skip_check) -> fun().
181 | 
182 | char_comment(C) -> 
183 |     func(unidata, comment, C).
184 | 
185 | 
186 | -spec ccc(C::char()) -> ux_ccc();
187 |         (skip_check) -> fun().
188 | 
189 | ccc(C) -> 
190 |     func(unidata, ccc, C).
191 | 
192 | 
193 | 
194 | -spec nfc_qc(C::char()) -> y | n | m;
195 |         (skip_check) -> fun().
196 | 
197 | nfc_qc(C) -> 
198 |     func(norm_props, nfc_qc, C).
199 | 
200 | 
201 | -spec nfd_qc(C::char()) -> y | n | m;
202 |         (skip_check) -> fun().
203 | 
204 | nfd_qc(C) -> 
205 |     func(norm_props, nfd_qc, C).
206 | 
207 | 
208 | -spec nfkc_qc(C::char()) -> y | n | m;
209 |         (skip_check) -> fun().
210 | 
211 | nfkc_qc(C) -> 
212 |     func(norm_props, nfkc_qc, C).
213 | 
214 | 
215 | -spec nfkd_qc(C::char()) -> y | n | m;
216 |         (skip_check) -> fun().
217 | 
218 | nfkd_qc(C) -> 
219 |     func(norm_props, nfkd_qc, C).
220 | 
221 | 
222 | -spec is_compat(C::char()) -> boolean();
223 |         (skip_check) -> fun().
224 | 
225 | is_compat(C) -> 
226 |     func(unidata, is_compat, C).
227 | 
228 | 
229 | 
230 | -spec is_comp_excl(C::char()) -> boolean();
231 |         (skip_check) -> fun().
232 | 
233 | is_comp_excl(C) -> 
234 |     func(comp_exclusions, is_exclusion, C).
235 | 
236 | 
237 | -spec ducet(list()) -> list() | atom();
238 |         (skip_check) -> fun().
239 | 
240 | ducet(L) -> func(allkeys, ducet, L).
241 | 
242 | 
243 | -spec comp(char(), char()) -> char() | false.
244 | 
245 | comp(C1, C2) -> 
246 |     func(unidata, comp, {C1, C2}).
247 | 
248 | comp('skip_check') -> 
249 |     F = func(unidata, comp, 'skip_check'),
250 |     fun(C1, C2) ->
251 |         F({C1, C2})
252 |     end.
253 |     
254 | 
255 | 
256 | -spec decomp(char()) -> list();
257 |         (skip_check) -> fun().
258 | 
259 | decomp(C) -> 
260 |     func(unidata, decomp, C).
261 | 
262 | 
263 | -spec char_block(C::char()) -> atom();
264 |         (skip_check) -> fun().
265 | 
266 | char_block(C) -> 
267 |     func(blocks, block, C).
268 | 
269 | 
270 | -spec char_script(C::char()) -> atom();
271 |         (skip_check) -> fun().
272 | 
273 | char_script(C) -> 
274 |     func(scripts, script, C).
275 | 
276 | 
277 | -spec break_props(atom()) -> fun().
278 | break_props('grapheme') ->
279 |     Name = 'grapheme_break_property',
280 |     func(Name, Name, 'skip_check');
281 | 
282 | break_props('word') ->
283 |     Name = 'word_break_property',
284 |     func(Name, Name, 'skip_check').
285 |     
286 | 
287 | 
288 | func(Parser, Type, Value) -> 
289 |     F = ux_unidata_filelist:get_source(Parser, Type),
290 |     F(Value).
291 | 
292 | 
293 | % Case or Kana Subtype
294 | w3(C) when 16#FF67 >= C, C >= 16#FF6F -> small_narrow_katakana;
295 | w3(C) when 16#FF71 >= C, C >= 16#FF9D -> narrow_katakana;
296 | w3(C) when 16#FFA0 >= C, C >= 16#FFDF -> narrow_hangul;
297 | w3(C) when 16#32D0 >= C, C >= 16#32FE -> circled_katakana;
298 | w3(C) -> 
299 |     case func(unidata, w3, C) of 
300 |         false ->
301 |             case is_upper(C) of
302 |                 true -> upper;
303 |                 false -> false end;
304 |             
305 |         Type -> type end.
306 | 
307 | 
308 | % Decomposition Type
309 | comp_tag(C) -> func(unidata, comp_tag, C).
310 | 
311 | 
312 | % http://unicode.org/reports/tr10/#Tertiary_Weight_Table
313 | tertiary_weight(C) ->
314 |     Type = comp_tag(C),
315 |     SubType = w3(C),
316 | 
317 |     case {Type, SubType} of
318 |         {false,     false}                  -> 16#02;
319 |         {wide,      false}                  -> 16#03;
320 |         {compat,    false}                  -> 16#04;
321 |         {font,      false}                  -> 16#05;
322 |         {circle,    false}                  -> 16#06;
323 |                                       
324 |         {false,     upper}                  -> 16#08;
325 |         {wide,      upper}                  -> 16#09;
326 |         {compat,    upper}                  -> 16#0A;
327 |         {font,      upper}                  -> 16#0B;
328 |         {circle,    upper}                  -> 16#0C;
329 | 
330 |         {small,     small_hiragana}         -> 16#0D;
331 |         {false,     normal_hiragana}        -> 16#0E;
332 |         {small,     small_katakana}         -> 16#0F;
333 |         {narrow,    small_narrow_katakana}  -> 16#10;
334 |         {false,     normal_katakana}        -> 16#11;
335 |         {narrow,    narrow_katakana}        -> 16#12;
336 |         {narrow,    narrow_hangul}          -> 16#12;
337 |         {circle,    circled_katakana}       -> 16#13;
338 |         {super,     false}                  -> 16#14;
339 |         {sub,       false}                  -> 16#15;
340 |         {vertical,  false}                  -> 16#16;
341 |         {initial,   false}                  -> 16#17;
342 |         {medial,    false}                  -> 16#18;
343 |         {final,     false}                  -> 16#19;
344 |         {isolated,  false}                  -> 16#1A;
345 |         {noBreak,   false}                  -> 16#1D;
346 |         {square,    false}                  -> 16#1C;
347 |         {square,    upper}                  -> 16#1D;
348 |         {super,     upper}                  -> 16#1D;
349 |         {sub,       upper}                  -> 16#1D;
350 |         {fraction,  false}                  -> 16#1E;
351 |         {_,         _}                      -> 16#1F
352 |     end.
353 | 


--------------------------------------------------------------------------------
/src/ux_wb.erl:
--------------------------------------------------------------------------------
  1 | %%% @doc Default Word Cluster Boundary Breaker
  2 | %%% 
  3 | %%%      [UTR29: Word Cluster Boundaries]
  4 | %%%      (http://unicode.org/reports/tr29/#Word_Cluster_Boundaries)
  5 | %%% @end
  6 | 
  7 | -module(ux_wb).
  8 | -include("ux.hrl").
  9 | -export([split/1, words/1]).
 10 | 
 11 | 
 12 | % Carriage Return
 13 | -define(CR, 16#000D).
 14 | 
 15 | % Line Feed
 16 | -define(LF, 16#000A).
 17 | 
 18 | % Zero width non-joiner
 19 | -define(ZWNJ, 16#200C).
 20 | 
 21 | % Zero width joiner
 22 | -define(ZWJ, 16#200D).
 23 | 
 24 | 
 25 | 
 26 | %% Adds the atom 'x' between non-breaked characters.
 27 | split(S) -> 
 28 |     Acc = [],
 29 | 
 30 |     % extract general classes
 31 |     TypesFn = ux_unidata:break_props('word'),
 32 |     Types = lists:map(TypesFn, S),
 33 |     {ColS, ColTypes} = collapse(S, Types),
 34 | 
 35 |     LastType = '',
 36 |     Res = do_split(LastType, ColS, ColTypes, Acc),
 37 | 
 38 |     {Types, expand(Res)}.
 39 | 
 40 |     
 41 | words(S) ->
 42 |     {Types, Splitted} = ux_wb:split(S),
 43 |     Mod = false,
 44 |     Word = [],
 45 |     Acc = [],
 46 |     do_words(Mod, Splitted, Types, Word, Acc).
 47 |     
 48 | 
 49 | 
 50 | %% Extract words.
 51 | 
 52 | % Extract word.
 53 | do_words(true, ['-'|ST], TT, Word=[_|_], Acc) ->
 54 |     RWord = lists:reverse(Word),
 55 |     NewAcc = [RWord|Acc],
 56 | 
 57 |     Mod = false,
 58 |     NewWord = [],
 59 |     do_words(Mod, ST, TT, NewWord, NewAcc);
 60 | 
 61 | % Skip extracting. Not word.
 62 | do_words(_Mod, ['-'|ST], TT, _Word, Acc) ->
 63 |     Mod = false,
 64 |     NewWord = [],
 65 |     do_words(Mod, ST, TT, NewWord, Acc);
 66 |     
 67 | % Word.
 68 | do_words(_Mod, [SH|ST], [TH|TT], Word, Acc) 
 69 |     when TH=:='ALetter' ->
 70 |     Mod = true,
 71 |     NewWord = [SH|Word],
 72 |     do_words(Mod, ST, TT, NewWord, Acc);
 73 |     
 74 | % Maybe word.
 75 | do_words(Mod, [SH|ST], [TH|TT], Word, Acc) ->
 76 |     NewWord = [SH|Word],
 77 |     do_words(Mod, ST, TT, NewWord, Acc);
 78 | 
 79 | % End of string.
 80 | do_words(true, [], [], [_|_] = Word, Acc) ->
 81 |     RWord = lists:reverse(Word),
 82 |     NewAcc = [RWord|Acc],
 83 |     lists:reverse(NewAcc);
 84 | 
 85 | do_words(false, [], [], _Word, Acc) ->
 86 |     lists:reverse(Acc).
 87 |     
 88 |     
 89 |     
 90 |     
 91 |     
 92 | 
 93 | 
 94 | % WB4
 95 | collapse(S, T) ->
 96 |     SR = [],
 97 |     TR = [],
 98 |     do_collapse(S, T, SR, TR).
 99 |     
100 | 
101 | do_collapse([SH1,SH2|ST], [TH1,TH2|TT], SR, TR) 
102 |     when (TH2=:='Extend' orelse 
103 |           TH2=:='Format')
104 |     andalso TH1=/='Newline'
105 |     andalso TH1=/='CR'
106 |     andalso TH1=/='LF' ->
107 |     {NewST, NewTT, SH} = do_collapse2(ST, TT, [SH2,SH1]),
108 |     do_collapse(NewST, NewTT, [SH|SR], [TH1|TR]);
109 | 
110 | do_collapse([SH|ST], [TH|TT], SR, TR) ->
111 |     do_collapse(ST, TT, [SH|SR], [TH|TR]);
112 |     
113 | do_collapse([], [], SR, TR) ->
114 |     {lists:reverse(SR), lists:reverse(TR)}.
115 | 
116 | 
117 | do_collapse2([SH|ST], [TH|TT], SR) 
118 |     when TH=:='Extend'
119 |        ; TH=:='Format' ->
120 |     do_collapse2(ST, TT, [SH|SR]);
121 | do_collapse2(ST, TT, SR) ->
122 |     {ST, TT, SR}.
123 | 
124 | 
125 | 
126 | expand(S) ->
127 |     Acc = [],
128 |     do_expand(S, Acc).
129 | 
130 | do_expand([[_|_]=H|T], Acc) ->
131 |     NewAcc = do_expand2(H, Acc),
132 |     do_expand(T, NewAcc);
133 |     
134 | do_expand([H|T], Acc) ->
135 |     NewAcc = [H|Acc],
136 |     do_expand(T, NewAcc);
137 |     
138 | do_expand([], Acc) ->
139 |     Acc.
140 | 
141 | do_expand2([H|T], Acc) ->
142 |     do_expand2(T, [H|Acc]);
143 | do_expand2([], Acc) ->
144 |     Acc.
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | % WB3
152 | do_split(_LT, [_CR,_LF|ST], 
153 |               ['CR','LF'|TT], Acc) ->
154 |     NewAcc = [?LF,?CR|Acc],
155 |     do_split('LF', ST, TT, NewAcc);
156 | 
157 | % WB3a
158 | do_split(_LT, [SH|ST], 
159 |               [TH1|TT = [_|_]], Acc) 
160 |     when TH1=:='Newline'
161 |        ; TH1=:='CR'
162 |        ; TH1=:='LF' ->
163 |     NewAcc = ['-',SH|Acc],
164 |     do_split(TH1, ST, TT, NewAcc);
165 | 
166 | % WB3b
167 | do_split(_LT, [SH|ST], 
168 |               [TH1|TT=[TH2|_]], Acc) 
169 |     when TH2=:='Newline'
170 |        ; TH2=:='CR'
171 |        ; TH2=:='LF' ->
172 |     NewAcc = ['-',SH|Acc],
173 |     do_split(TH1, ST, TT, NewAcc);
174 |     
175 | 
176 | 
177 | 
178 | 
179 | % WB5
180 | do_split(_LT, [SH|ST], 
181 |               ['ALetter'|TT = ['ALetter'|_]], Acc) ->
182 |     NewAcc = [SH|Acc],
183 |     do_split('ALetter', ST, TT, NewAcc);
184 | 
185 | % WB6 
186 | do_split(_LT, [SH|ST], 
187 |               ['ALetter'|TT = [TH2,'ALetter'|_]], Acc) 
188 |     when TH2=:='MidLetter'
189 |        ; TH2=:='MidNumLet' ->
190 |     NewAcc = [SH|Acc],
191 |     do_split('ALetter', ST, TT, NewAcc);
192 | 
193 |  
194 | % WB7 
195 | do_split('ALetter', [SH|ST], 
196 |               [TH1|TT = ['ALetter'|_]], Acc) 
197 |     when TH1=:='MidLetter'
198 |        ; TH1=:='MidNumLet' ->
199 |     NewAcc = [SH|Acc],
200 |     do_split(TH1, ST, TT, NewAcc);
201 |  
202 | % WB8
203 | do_split(_LT, [SH|ST], 
204 |               ['Numeric'|TT = ['Numeric'|_]], Acc) ->
205 |     NewAcc = [SH|Acc],
206 |     do_split('Numeric', ST, TT, NewAcc);
207 | 
208 | % WB9
209 | do_split(_LT, [SH|ST], 
210 |               ['ALetter'|TT = ['Numeric'|_]], Acc) ->
211 |     NewAcc = [SH|Acc],
212 |     do_split('ALetter', ST, TT, NewAcc);
213 | 
214 | % WB10
215 | do_split(_LT, [SH|ST], 
216 |               ['Numeric'|TT = ['ALetter'|_]], Acc) ->
217 |     NewAcc = [SH|Acc],
218 |     do_split('Numeric', ST, TT, NewAcc);
219 | 
220 | 
221 |  
222 | % WB11
223 | do_split('Numeric', [SH|ST], 
224 |               [TH1|TT = ['Numeric'|_]], Acc) 
225 |     when TH1=:='MidNum'
226 |        ; TH1=:='MidNumLet' ->
227 |     NewAcc = [SH|Acc],
228 |     do_split(TH1, ST, TT, NewAcc);
229 | 
230 | % WB12
231 | do_split(_LT, [SH|ST], 
232 |               ['Numeric'|TT = [TH2,'Numeric'|_]], Acc) 
233 |     when TH2=:='MidNum'
234 |        ; TH2=:='MidNumLet' ->
235 |     NewAcc = [SH|Acc],
236 |     do_split('Numeric', ST, TT, NewAcc);
237 | 
238 | 
239 | % WB13
240 | do_split(_LT, [SH|ST], 
241 |               ['Katakana'|TT = ['Katakana'|_]], Acc) ->
242 |     NewAcc = [SH|Acc],
243 |     do_split('Katakana', ST, TT, NewAcc);
244 | 
245 | % WB13a
246 | do_split(_LT, [SH|ST], 
247 |               [TH1|TT = ['ExtendNumLet'|_]], Acc) 
248 |     when TH1=:='ALetter'
249 |        ; TH1=:='Numeric'
250 |        ; TH1=:='Katakana'
251 |        ; TH1=:='ExtendNumLet' ->
252 |     NewAcc = [SH|Acc],
253 |     do_split(TH1, ST, TT, NewAcc);
254 | 
255 | % WB13b
256 | do_split(_LT, [SH|ST], 
257 |               ['ExtendNumLet'|TT = [TH2|_]], Acc) 
258 |     when TH2=:='ALetter'
259 |        ; TH2=:='Numeric'
260 |        ; TH2=:='Katakana' ->
261 |     NewAcc = [SH|Acc],
262 |     do_split('ExtendNumLet', ST, TT, NewAcc);
263 | 
264 | 
265 | 
266 | % Any
267 | do_split(_LT, 
268 |             [SH|ST], 
269 |             [TH|TT=[_|_]], Acc) ->
270 |     NewAcc = ['-',SH|Acc],
271 |     do_split(TH, ST, TT, NewAcc);
272 | 
273 | % Any
274 | do_split(_LT, 
275 |             [SH], 
276 |             [TH], Acc) ->
277 |    [SH|Acc];
278 | 
279 | do_split(_T, [], [], ['-'|Acc]) ->
280 |    Acc;
281 | 
282 | do_split(_T, [], [], Acc) ->
283 |    Acc.
284 | 
285 |     
286 | 


--------------------------------------------------------------------------------
/start-dev.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | cd `dirname $0`
 3 | make
 4 | # NOTE: mustache templates need \ because they are not awesome.
 5 | exec erl -pa $PWD/ebin edit $PWD/deps/*/ebin -boot start_sasl \
 6 |     -sname ux \
 7 |     -s ux \
 8 | #   -s reloader
 9 | 
10 | 


--------------------------------------------------------------------------------
/test/ux_break_tests.erl:
--------------------------------------------------------------------------------
  1 | -module(ux_break_tests).
  2 | 
  3 | 
  4 | -ifdef(TEST).
  5 | -include_lib("eunit/include/eunit.hrl").
  6 | 
  7 | grapheme_break_test_() ->
  8 |     Fun = fun(S) -> 
  9 |         ux_gb:split('extended', S) 
 10 |         end,
 11 |     Fun2 = fun(S) -> 
 12 |         [X || X <- S, X=/='-']
 13 |         end,
 14 |     File = ux_unidata:open_test_file('grapheme_break_test'),
 15 |     {timeout, 600, 
 16 |         fun() -> 
 17 |         run_test(File, Fun, Fun2)
 18 |         end}.
 19 | 
 20 | 
 21 | 
 22 | word_break_test_() ->
 23 |     Fun = fun(S) -> 
 24 |         {_Types, R} = ux_wb:split(S),
 25 |         case R of
 26 |         [] -> [];
 27 |         [_|_] -> ['-'] ++ R ++ ['-']
 28 |         end
 29 |         end,
 30 |     Fun2 = fun(S) -> 
 31 |         [X || X <- S, X=/='x']
 32 |         end,
 33 |     File = ux_unidata:open_test_file('word_break_test'),
 34 |     {timeout, 600, 
 35 |         fun() -> 
 36 |         run_test(File, Fun, Fun2)
 37 |         end}.
 38 | 
 39 | 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | 
 45 | 
 46 | 
 47 | 
 48 | %%
 49 | %% Helpers
 50 | %%
 51 | 
 52 | parse(S) -> 
 53 |     Res = [],
 54 |     ResWithDelims = [],
 55 |     P = do_parse(S, Res, ResWithDelims),
 56 |     P.
 57 | 
 58 | do_parse([[247]|T], R, RD) ->
 59 |     NewRD = ['-'|RD],
 60 |     do_parse(T, R, NewRD);
 61 | 
 62 | do_parse([[215]|T], R, RD) ->
 63 |     NewRD = ['x'|RD],
 64 |     do_parse(T, R, NewRD);
 65 | 
 66 | do_parse([[]|T], R, RD) ->
 67 |     do_parse(T, R, RD);
 68 | 
 69 | do_parse([H|T], R, RD) ->
 70 |     Int = ux_unidata_parser:hex_to_int(H),
 71 |     true = (Int=/=false),
 72 |     
 73 |     NewRD = [Int|RD],
 74 |     NewR = [Int|R],
 75 |     do_parse(T, NewR, NewRD);
 76 | 
 77 | do_parse([], R, RD) ->
 78 |     {lists:reverse(R), lists:reverse(RD)}.
 79 |     
 80 | 
 81 | run_test(Fd, Fun, Fun2) ->
 82 |     io:setopts(Fd, [{encoding,utf8}]),
 83 |     do_test(Fd, Fun, Fun2),
 84 |     file:close(Fd).
 85 | 
 86 | do_test(Fd, Fun, Fun2) ->
 87 |     case io:get_line(Fd, "") of
 88 |     eof -> ok;
 89 |     Data ->
 90 |         Data1 = ux_unidata_parser:delete_comments(Data),
 91 |         Data2 = ux_unidata_parser:delete_spaces(Data1, $\t),
 92 |         Tokens = ux_unidata_parser:split($ , Data2),
 93 |         {Str, StrWithDelims} = parse(Tokens),
 94 |         ?assertEqual(Fun2(StrWithDelims), Fun(Str)),
 95 |         do_test(Fd, Fun, Fun2)
 96 |     end.
 97 |     
 98 | 
 99 | 
100 | -endif.
101 |     
102 | 


--------------------------------------------------------------------------------
/test/ux_char_tests.erl:
--------------------------------------------------------------------------------
  1 | -module(ux_char_tests).
  2 | 
  3 | 
  4 | -ifdef(TEST).
  5 | -include_lib("eunit/include/eunit.hrl").
  6 | -include("ux_tests.hrl").
  7 |     
  8 | 
  9 | is_letter_test_() ->
 10 |     setup(),
 11 |     M = 'ux_char',
 12 |     F = 'is_letter',
 13 |     [?_assertTO(not M:F($ ))
 14 |     ,?_assertTO(not M:F($-))
 15 |     ,?_assertTO(not M:F($1))
 16 |     ,?_assertTO(M:F($r))
 17 |     ,?_assertTO(M:F($G))
 18 |     ,?_assertTO(M:F(1099))
 19 |     ].
 20 | 
 21 | is_ascii_test_() ->
 22 |     setup(),
 23 |     M = 'ux_char',
 24 |     F = 'is_acsii',
 25 |     [?_assertTO(M:F($ ))
 26 |     ,?_assertTO(M:F($-))
 27 |     ,?_assertTO(M:F($1))
 28 |     ,?_assertTO(M:F($r))
 29 |     ,?_assertTO(M:F($G))
 30 |     ,?_assertTO(not M:F(1099))
 31 |     ].
 32 | 
 33 | is_lower_test_() ->
 34 |     setup(),
 35 |     M = 'ux_char',
 36 |     F = 'is_lower',
 37 |     [?_assertTO(not M:F($ ))
 38 |     ,?_assertTO(not M:F($-))
 39 |     ,?_assertTO(not M:F($1))
 40 |     ,?_assertTO(M:F($r))
 41 |     ,?_assertTO(not M:F($G))
 42 |     ,?_assertTO(M:F(1099))
 43 |     ].
 44 | 
 45 | 
 46 | is_upper_test_() ->
 47 |     setup(),
 48 |     M = 'ux_char',
 49 |     F = 'is_upper',
 50 |     [?_assertTO(not M:F($ ))
 51 |     ,?_assertTO(not M:F($-))
 52 |     ,?_assertTO(not M:F($1))
 53 |     ,?_assertTO(not M:F($r))
 54 |     ,?_assertTO(M:F($G))
 55 |     ,?_assertTO(not M:F(1099))
 56 |     ].
 57 | 
 58 | is_punctuation_mark_test_() ->
 59 |     setup(),
 60 |     M = 'ux_char',
 61 |     F = 'is_punctuation_mark',
 62 |     [?_assertTO(not M:F($ ))
 63 |     ,?_assertTO(M:F($-))
 64 |     ,?_assertTO(not M:F($1))
 65 |     ,?_assertTO(not M:F($r))
 66 |     ,?_assertTO(not M:F($G))
 67 |     ,?_assertTO(not M:F(1099))
 68 |     ].
 69 | 
 70 | is_decimal_test_() ->
 71 |     setup(),
 72 |     M = 'ux_char',
 73 |     F = 'is_decimal',
 74 |     [?_assertTO(not M:F($ ))
 75 |     ,?_assertTO(not M:F($-))
 76 |     ,?_assertTO(M:F($1))
 77 |     ,?_assertTO(not M:F($r))
 78 |     ,?_assertTO(not M:F($G))
 79 |     ,?_assertTO(not M:F(1099))
 80 |     ].
 81 | 
 82 | is_number_test_() ->
 83 |     setup(),
 84 |     M = 'ux_char',
 85 |     F = 'is_number',
 86 |     [?_assertTO(not M:F($ ))
 87 |     ,?_assertTO(not M:F($-))
 88 |     ,?_assertTO(M:F($1))
 89 |     ,?_assertTO(not M:F($r))
 90 |     ,?_assertTO(not M:F($G))
 91 |     ,?_assertTO(not M:F(1099))
 92 |     ].
 93 | 
 94 | is_separator_test_() ->
 95 |     setup(),
 96 |     M = 'ux_char',
 97 |     F = 'is_separator',
 98 |     [?_assertTO(M:F($ ))
 99 |     ,?_assertTO(not M:F($-))
100 |     ,?_assertTO(not M:F($1))
101 |     ,?_assertTO(not M:F($r))
102 |     ,?_assertTO(not M:F($G))
103 |     ,?_assertTO(not M:F(1099))
104 |     ].
105 | 
106 | -endif.
107 | 


--------------------------------------------------------------------------------
/test/ux_string_tests.erl:
--------------------------------------------------------------------------------
  1 | -module(ux_string_tests).
  2 | 
  3 | %%
  4 | %% Tests
  5 | %%
  6 | 
  7 | -ifdef(TEST).
  8 | -include_lib("eunit/include/eunit.hrl").
  9 | -include("ux_tests.hrl").
 10 | 
 11 | 
 12 | 
 13 | explode_test_() ->
 14 |     setup(),
 15 |     M = 'ux_string',
 16 |     F = 'explode',
 17 |     [?_assertEqual(M:F(":", "1:2:3"), ["1", "2", "3"])
 18 |     ,?_assertEqual(M:F(":", "aa::aa"), ["aa", "", "aa"])
 19 |     ,?_assertEqual(M:F(":", "aa::"), ["aa", "", ""])
 20 |     ,?_assertEqual(M:F("::", "aa::aa"), ["aa", "aa"])
 21 |     ,?_assertEqual(M:F("::", "aa:::aa"), ["aa", ":aa"])
 22 |     ,?_assertEqual(M:F("::", "aa:::"), ["aa", ":"])
 23 | 
 24 |     ,?_assertEqual(M:F([":", ";"], "aa:;:aa"), ["aa", "", "", "aa"])
 25 |     ,?_assertEqual(M:F([";:", ";"], "aa:;:aa"), ["aa:", "aa"])
 26 | 
 27 |     ,?_assertEqual(M:F($c, "dfsawcddcs"), ["dfsaw", "dd", "s"])
 28 |     ,?_assertEqual(M:F($c, "dfsawcddcs",2 ), ["dfsaw", "ddcs"])
 29 | 
 30 |     ,{"Limit>0",
 31 |         ?_assertEqual(M:F("|", "one|two|three|four", 2), ["one", "two|three|four"])}
 32 | 
 33 |     ,{"Limit<0",
 34 |         [?_assertEqual(M:F("|", "one|two|three|four", -1), ["one", "two", "three"])
 35 |         ,?_assertEqual(M:F("-", "one|two|three|four", -1), [])
 36 |         ,?_assertEqual(M:F("-", "one|two|three|four"), ["one|two|three|four"])
 37 |         ]}
 38 | 
 39 |     ,?_assertEqual(M:F("-", ""), [])
 40 |     % Empty delimeter. 
 41 |     % PHP behaviour: return false.
 42 |     % Erlang behaviour: throw error.
 43 |     ,{"Check an error in matching.",
 44 |         [?_assertError(function_clause, M:F("", "test"))
 45 |         ,?_assertError(function_clause, M:F("", ""))
 46 |         ,?_assertError(function_clause, M:F("", "", 0))
 47 |         ,?_assertError(function_clause, M:F("", "", 1))
 48 |         ,?_assertError(function_clause, M:F("", "", -1))
 49 |         ,?_assertError(function_clause, M:F("", "test", 0))
 50 |         ,?_assertError(function_clause, M:F("", "test", 1))
 51 |         ,?_assertError(function_clause, M:F("", "test", -1))
 52 |         ]}
 53 |     ].
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | %%
 83 | %% With Unidata
 84 | %%
 85 | 
 86 | to_lower_test_() ->
 87 |     setup(),
 88 |     M = 'ux_string',
 89 |     F = 'to_lower',
 90 |     [?_assertEqualTO(M:F("small BIG"), "small big")
 91 |     ,?_assertEqualTO(M:F("You want your freedom?"), 
 92 |                          "you want your freedom?")
 93 |     % Russian text
 94 |     ,?_assertEqualTO(M:F([1069,1056,1051,1040,1053,1043]), 
 95 |                          [1101,1088,1083,1072,1085,1075])
 96 | 
 97 |     ,?_assertEqualTO(M:F(""), "")
 98 |     ].
 99 | 
100 | to_upper_test_() ->
101 |     M = 'ux_string',
102 |     F = 'to_upper',
103 |     [?_assertEqualTO(M:F("small BIG"), "SMALL BIG")
104 |     ,?_assertEqualTO(M:F("I'm making a note here: HUGE SUCCESS."), 
105 |                          "I'M MAKING A NOTE HERE: HUGE SUCCESS.")
106 |     ,?_assertEqualTO(M:F([1101,1088,1083,1072,1085,1075]),
107 |                          [1069,1056,1051,1040,1053,1043])
108 | 
109 |     ,?_assertEqualTO(M:F(""), "")
110 |     ].
111 | 
112 | delete_types_test_() ->
113 |     setup(),
114 |     M = 'ux_string',
115 |     F = 'delete_types',
116 |     [?_assertEqualTO(M:F(['Ll', 'Lu'], "Tom Cat!"), " !")
117 |     ,?_assertEqualTO(M:F(['Ll'], "Tom Cat!"), "T C!")
118 |     ,?_assertEqualTO(M:F(['Po'], "Tom Cat!"), "Tom Cat")
119 |     ,{"Skip 2 chars (A,B).",
120 |         ?_assertEqualTO(M:F(['Ll'], "AaBbCc44ff", -2), "ABbCc44ff")}
121 |     ,{"Delete only 2 chars (A,B).",
122 |         ?_assertEqualTO(M:F(['Ll'], "AaBbCc44ff",  2), "ABCc44ff")}
123 |     ,?_assertEqualTO(M:F(['Ll'], "AaBbCc44ffdsBAF",  4), "ABC44fdsBAF")
124 |     ,?_assertEqualTO(M:F(['Ll'], "AaBbCc44ffdsBAF", -4), "ABC44ffdsBAF")
125 | 
126 |     ,?_assertEqualTO(M:F(['Ll'], "cat"), "")
127 |     ,?_assertEqualTO(M:F(['Ll'], ""), "")
128 |     ,?_assertEqualTO(M:F([], ""), "")
129 |     ].
130 | 
131 | filter_types_test_() ->
132 |     setup(),
133 |     M = 'ux_string',
134 |     F = 'filter_types',
135 |     [?_assertEqualTO(M:F(['Ll', 'Lu'], "Tom Cat!"), "TomCat")
136 |     ,?_assertEqualTO(M:F(['Ll'], "Tom Cat!"), "omat")
137 |     ,?_assertEqualTO(M:F(['Po'], "Tom Cat!"), "!")
138 |     ,?_assertEqualTO(M:F(['Ll'], "AaBbCc44ffds",  3), "abc44ffds")
139 |     ,?_assertEqualTO(M:F(['Ll'], "AaBbCc44ffds",  4), "abcffds")
140 |     ,?_assertEqualTO(M:F(['Ll'], "AaBbCc44ffds", -2), "abCc44ffds")
141 |     ,?_assertEqualTO(M:F(['Ll'], "AaBbCc44ffds", -4), "abc4ffds")
142 | 
143 |     ,?_assertEqualTO(M:F(['Lu'], "cat"), "")
144 |     ,?_assertEqualTO(M:F(['Lu'], ""), "")
145 |     ,?_assertEqualTO(M:F([], ""), "")
146 |     ].
147 | 
148 | types_test_() ->
149 |     setup(),
150 |     M = 'ux_string',
151 |     F = 'types',
152 |     [?_assertEqualTO(M:F("Tom Cat!"), ['Lu','Ll','Ll','Zs','Lu','Ll','Ll','Po'])
153 |     ,?_assertEqualTO(M:F(""), [])
154 |     %,?_assertEqual(M:F(), )
155 |     ].
156 | 
157 | last_types_test_() ->
158 |     setup(),
159 |     M = 'ux_string',
160 |     F = 'last_types',
161 |     [?_assertEqualTO(M:F(['Ll'], "AavbfFDsdfffd9s9999", -5), "99999")
162 |     ,?_assertEqualTO(M:F(['Ll'], "AavbfFDsdfffd9s9999", -6), "D99999")
163 |     ,?_assertEqualTO(M:F(['Ll'], "AavbfFDsdfffd9s9999", -7), "FD99999")
164 |     ,?_assertEqualTO(M:F(['Ll'], "AavbfFDsdfffd9s9999", -8), "AFD99999")
165 |     ,?_assertEqualTO(M:F([], "", -5), [])
166 |     ].
167 | 
168 | first_types_test_() ->
169 |     setup(),
170 |     M = 'ux_string',
171 |     F = 'first_types',
172 |     [?_assertEqualTO(M:F(['Ll'], "AavbfFDsdfffds", 4), "avbf")
173 |     ,?_assertEqualTO(M:F(['Ll'], "AavbfFDsdfffds", 5), "avbfs")
174 |     ,?_assertEqualTO(M:F([], "", 5), [])
175 |     ].
176 | 
177 | 
178 | 
179 | 
180 | to_graphemes_test_() ->
181 |     setup(),
182 |     M = 'ux_string',
183 |     F = 'to_graphemes',
184 |     [{"Simple example", 
185 |         ?_assertEqualTO(M:F("Octocat!"), ["O","c","t","o","c","a","t","!"])},
186 |      {"U+1EE5 LATIN SMALL LETTER U WITH DOT BELOW, U+031B COMBINING HORN, a, b",
187 |         ?_assertEqualTO(M:F([16#1EE5, 16#031B, $a, $b]), [[7909,795],"a","b"])},
188 |      ?_assertEqualTO(M:F(""), [])
189 |     ].
190 | 
191 | first_test_() ->
192 |     setup(),
193 |     M = 'ux_string',
194 |     F = 'first',
195 |     [?_assertEqualTO(M:F("Octocat!", 4), "Octo")
196 |     ,?_assertEqualTO(M:F("", 4), "")
197 |     ,?_assertEqualTO(M:F("cat", 4), "cat")
198 |     ].
199 | 
200 | last_test_() ->
201 |     setup(),
202 |     M = 'ux_string',
203 |     F = 'last',
204 |     [?_assertEqualTO(M:F("Octocat!", 4), "cat!")
205 |     ,?_assertEqualTO(M:F("", 4), "")
206 |     ,?_assertEqualTO(M:F("cat", 4), "cat")
207 |     ].
208 | 
209 | length_test_() ->
210 |     setup(),
211 |     M = 'ux_string',
212 |     F = 'length',
213 |     [?_assertEqualTO(M:F("Octo"), 4)
214 |     ,?_assertEqualTO(M:F(""), 0)
215 |     ].
216 | 
217 | 
218 | script_test_() ->
219 |     setup(),
220 |     M = 'ux_string',
221 |     F = 'script',
222 |     [?_assertEqualTO(M:F("Octocat!"), 'Latin')
223 |     ,?_assertEqualTO(M:F([1086,1082,1090,1086,1082,1101,1090]), 'Cyrillic')
224 |     ,?_assertEqualTO(M:F(""), false)
225 |     ].
226 | 
227 | 
228 | 
229 | scripts_test_() ->
230 |     setup(),
231 |     M = 'ux_string',
232 |     F = 'scripts',
233 |     S = fun lists:sort/1,
234 |     [?_assertEqualTO(S(M:F("Octocat!")), S(['Latin','Common']))
235 |     ,?_assertEqualTO(M:F([1086,1082,1090,1086,1082,1101,1090]), ['Cyrillic'])
236 |     ,?_assertEqualTO(M:F(""), [])
237 |     ].
238 | 
239 | 
240 | 
241 | to_nfc_test_() ->
242 |     setup(),
243 |     M = 'ux_string',
244 |     F = 'to_nfc',
245 |     [?_assertEqualTO(M:F(""), "")
246 |     ].
247 | 
248 | to_nfd_test_() ->
249 |     setup(),
250 |     M = 'ux_string',
251 |     F = 'to_nfd',
252 |     [?_assertEqualTO(M:F(""), "")
253 |     ].
254 | 
255 | to_nfkc_test_() ->
256 |     setup(),
257 |     M = 'ux_string',
258 |     F = 'to_nfkc',
259 |     [?_assertEqualTO(M:F(""), "")
260 |     ].
261 | 
262 | to_nfkd_test_() ->
263 |     setup(),
264 |     M = 'ux_string',
265 |     F = 'to_nfkd',
266 |     [?_assertEqualTO(M:F(""), "")
267 |     ].
268 | 
269 | 
270 | 
271 | 
272 | 
273 | is_nfc_test_() ->
274 |     setup(),
275 |     M = 'ux_string',
276 |     F = 'is_nfc',
277 |     [?_assertEqualTO(M:F(""), 'yes')
278 |     ].
279 | 
280 | is_nfd_test_() ->
281 |     setup(),
282 |     M = 'ux_string',
283 |     F = 'is_nfd',
284 |     [?_assertEqualTO(M:F(""), 'yes')
285 |     ].
286 | 
287 | is_nfkc_test_() ->
288 |     setup(),
289 |     M = 'ux_string',
290 |     F = 'is_nfkc',
291 |     [?_assertEqualTO(M:F(""), 'yes')
292 |     ].
293 | 
294 | is_nfkd_test_() ->
295 |     M = 'ux_string',
296 |     F = 'is_nfkd',
297 |     [?_assertEqualTO(M:F(""), 'yes')
298 |     ].
299 | 
300 | 
301 | 
302 | 
303 | 
304 | 
305 | 
306 | 
307 | 
308 | 
309 | 
310 | 
311 | 
312 | 
313 | 
314 | 
315 | -ifdef(SLOW_TESTS).
316 | 
317 | nfc_test_() ->
318 |     setup(),
319 |     {timeout, 300, 
320 |         {"Normalization Conformance Test", 
321 |             fun() -> 
322 |                 nfc_prof(1000000),
323 |                 io:format(user, "~n", []) end}}.
324 | 
325 | 
326 | 
327 | 
328 | %% @doc Normalization Conformance Test
329 | %% http://unicode.org/reports/tr41/tr41-7.html#Tests15
330 | %%
331 | %%    NFC
332 | %%      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
333 | %%      c4 ==  NFC(c4) ==  NFC(c5)
334 | %%
335 | %%    NFD
336 | %%      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
337 | %%      c5 ==  NFD(c4) ==  NFD(c5)
338 | %%
339 | %%    NFKC
340 | %%      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
341 | %%
342 | %%    NFKD
343 | %%      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
344 | %% @end
345 | %% @private
346 | nfc_test(_Fd, 0, StrNum) -> 
347 |     io:format(user, "Only ~w strings were tested. Exit.~n", [StrNum]),
348 |     ok;
349 | nfc_test(Fd, Max, StrNum) ->
350 |     % Show message
351 |     case StrNum rem 1000 of
352 |     0 -> io:format(user, "~n~w strings were tested. ", [StrNum]);
353 |     _ -> next
354 |     end,
355 | 
356 |     NFC  = fun ux_string:to_nfc/1,
357 |     NFD  = fun ux_string:to_nfd/1,
358 |     NFKC = fun ux_string:to_nfkc/1,
359 |     NFKD = fun ux_string:to_nfkd/1,
360 | 
361 |     case file:read_line(Fd) of
362 |     eof -> ok;
363 |     {ok, Data} -> 
364 |         try
365 |         [LineWithoutComment|_] = ux_string:explode("#", Data),
366 |         % Convert string from file to list of integers 
367 |         lists:map(fun (Str) -> 
368 |                 lists:map(fun ux_unidata_parser:hex_to_int/1, 
369 |                     string:tokens(Str, " ")) 
370 |             end,
371 |             ux_string:explode(";", LineWithoutComment))
372 |         of 
373 |         [C1,C2,C3,C4,C5,_] ->
374 |             % start of the body
375 |             % {Test info atom, Result from function, From, To}
376 |             %NFD
377 |             ?assertEqual({c3__nfd_c1, C3, C1, C3}, {c3__nfd_c1, NFD(C1), C1, C3}),
378 |             ?assertEqual({c3__nfd_c2, C3, C2, C3}, {c3__nfd_c2, NFD(C2), C2, C3}),
379 |             ?assertEqual({c3__nfd_c3, C3, C3, C3}, {c3__nfd_c3, NFD(C3), C3, C3}),
380 |             ?assertEqual({c3__nfd_c4, C5, C4, C5}, {c3__nfd_c4, NFD(C4), C4, C5}),
381 |             ?assertEqual({c3__nfd_c5, C5, C5, C5}, {c3__nfd_c5, NFD(C5), C5, C5}),
382 | 
383 |             %NFC
384 |             ?assertEqual({c2__nfc_c1, C2, C1, C2}, {c2__nfc_c1, NFC(C1), C1, C2}),
385 |             ?assertEqual({c2__nfc_c2, C2, C2, C2}, {c2__nfc_c2, NFC(C2), C2, C2}),
386 |             ?assertEqual({c2__nfc_c3, C2, C3, C2}, {c2__nfc_c3, NFC(C3), C3, C2}),
387 |             ?assertEqual({c2__nfc_c4, C4, C4, C4}, {c2__nfc_c4, NFC(C4), C4, C4}),
388 |             ?assertEqual({c2__nfc_c5, C4, C5, C4}, {c2__nfc_c5, NFC(C5), C5, C4}),
389 | 
390 |             %NFKC
391 |             ?assertEqual({c4__nfkc_c1, C4, C1}, {c4__nfkc_c1, NFKC(C1), C1}),
392 |             ?assertEqual({c4__nfkc_c2, C4, C2}, {c4__nfkc_c2, NFKC(C2), C2}),
393 |             ?assertEqual({c4__nfkc_c3, C4, C3}, {c4__nfkc_c3, NFKC(C3), C3}),
394 |             ?assertEqual({c4__nfkc_c4, C4, C4}, {c4__nfkc_c4, NFKC(C4), C4}),
395 |             ?assertEqual({c4__nfkc_c5, C4, C5}, {c4__nfkc_c5, NFKC(C5), C5}),
396 | 
397 |             %NFKD
398 |             ?assertEqual({c5__nfkd_c1, C5, C1}, {c5__nfkd_c1, NFKD(C1), C1}),
399 |             ?assertEqual({c5__nfkd_c2, C5, C2}, {c5__nfkd_c2, NFKD(C2), C2}),
400 |             ?assertEqual({c5__nfkd_c3, C5, C3}, {c5__nfkd_c3, NFKD(C3), C3}),
401 |             ?assertEqual({c5__nfkd_c4, C5, C4}, {c5__nfkd_c4, NFKD(C4), C4}),
402 |             ?assertEqual({c5__nfkd_c5, C5, C5}, {c5__nfkd_c5, NFKD(C5), C5});
403 | 
404 |             % end of the body
405 |         _ -> next
406 |         catch error:_ -> next
407 |         after 
408 |             nfc_test(Fd, Max - 1, StrNum + 1)
409 |         end
410 |     end.
411 | 
412 | nfc_prof(Count) ->
413 |     Fd = ux_unidata:open_test_file('normalization_test'),
414 |     io:setopts(Fd,[{encoding,utf8}]),
415 |     nfc_test(Fd, Count, 0),
416 |     file:close(Fd),
417 |     ok.
418 | 
419 | 
420 | -endif. % SLOW_TESTS
421 | -endif. % TEST
422 | 


--------------------------------------------------------------------------------
/test/ux_tests.hrl:
--------------------------------------------------------------------------------
 1 | -include("../src/ux.hrl").
 2 | 
 3 | -define(TO(X), {'timeout', 60, X}).
 4 | -define(_testTO(X), 
 5 |     ?TO(?_test(X))).
 6 | -define(_assertTO(X), 
 7 |     ?TO(?_assert(X))).
 8 | -define(_assertEqualTO(X, Y), 
 9 |     ?TO(?_assertEqual(X,Y))).
10 | 
11 | setup() -> ux:start().
12 | 


--------------------------------------------------------------------------------
/testing/UCA/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/testing/UCA/CollationTest/CollationTest_NON_IGNORABLE_SHORT.txt.gz


--------------------------------------------------------------------------------
/testing/UCA/CollationTest/CollationTest_SHIFTED_SHORT.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/testing/UCA/CollationTest/CollationTest_SHIFTED_SHORT.txt.gz


--------------------------------------------------------------------------------
/testing/UNIDATA/NormalizationTest.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erlang-unicode/ux/0ccdba1f1acb1545e32693215d5b2a50879e217e/testing/UNIDATA/NormalizationTest.txt.gz


--------------------------------------------------------------------------------
/ux_test.cfg:
--------------------------------------------------------------------------------
1 | {ux_common_config, []}.                      
2 | 


--------------------------------------------------------------------------------
/ux_test.spec:
--------------------------------------------------------------------------------
1 | {logdir, "logs"}.
2 | {config, "ux_test.cfg"}.
3 | {alias, common_tests, "common_tests"}.
4 | {cover, "common_tests/ux.cover"}.
5 | {suites, common_tests, ux_SUITE}.
6 | 


--------------------------------------------------------------------------------