├── src ├── unistring_app.erl └── unistring.app.src ├── .gitignore ├── rebar.config ├── Makefile ├── test └── unistring_tests.erl └── README.md /src/unistring_app.erl: -------------------------------------------------------------------------------- 1 | -module(unistring_app). 2 | -vsn("0.1.0"). -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | deps 3 | *.o 4 | *.beam 5 | *.plt 6 | *.dump 7 | ebin/*.* -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [fail_on_warning, debug_info]}. 2 | {cover_enabled, true}. 3 | {clean_files, ["*.eunit", "ebin/*.beam"]}. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REBAR:=rebar 2 | 3 | .PHONY: all erl test clean doc 4 | 5 | all: erl 6 | 7 | erl: 8 | $(REBAR) get-deps compile 9 | 10 | test: all 11 | @mkdir -p .eunit 12 | $(REBAR) skip_deps=true eunit 13 | 14 | clean: 15 | $(REBAR) clean 16 | -rm -rvf deps ebin doc .eunit 17 | 18 | doc: 19 | $(REBAR) doc 20 | -------------------------------------------------------------------------------- /src/unistring.app.src: -------------------------------------------------------------------------------- 1 | {application, unistring, 2 | [ 3 | {description, "Unicode utf-8 functions"}, 4 | {vsn, "0.1.0"}, 5 | {registered, []}, 6 | {applications, [ 7 | kernel, 8 | stdlib 9 | ]}, 10 | {mod, { unistring_app, []}}, 11 | {env, []} 12 | ]}. 13 | -------------------------------------------------------------------------------- /test/unistring_tests.erl: -------------------------------------------------------------------------------- 1 | %% -*- coding: utf-8 -*- 2 | -module(unistring_tests). 3 | -include_lib("eunit/include/eunit.hrl"). 4 | 5 | to_lower_test() -> 6 | ?assertEqual("test", unistring:to_lower("TEST")), 7 | ?assertEqual(<<"test">>, unistring:to_lower(<<"TEST">>)), 8 | ?assertEqual("привет", unistring:to_lower("ПРИВЕТ")), 9 | ?assertEqual(<<"привет"/utf8>>, unistring:to_lower(<<"ПРИВЕТ"/utf8>>)). 10 | 11 | to_upper_test() -> 12 | ?assertEqual("TEST", unistring:to_upper("test")), 13 | ?assertEqual(<<"TEST">>, unistring:to_upper(<<"test">>)), 14 | ?assertEqual("ПРИВЕТ", unistring:to_upper("привет")), 15 | ?assertEqual(<<"ПРИВЕТ"/utf8>>, unistring:to_upper(<<"привет"/utf8>>)). -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #unistring 2 | ========= 3 | 4 | ##Why 5 | Starting with Erlang **R16B**, [string module](http://www.erlang.org/doc/man/string.html) in standard library handles Unicode utf-8 strings very well, except that ```string:to_upper/1``` and ```string:to_lower/1``` ignore unicode code points > 255. 6 | 7 | Unistring library uses Lowercase Mapping, Uppercase Mapping and Titlecase Mapping columns hexadecimal values from [UnicodeData.txt](ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt). 8 | 9 | ##License 10 | Public Domain 11 | 12 | ##API 13 | Unicode utf-8 functions for Erlang: 14 | 15 | ```erlang 16 | unistring:to_lower/1 17 | unistring:to_upper/1 18 | unistring:to_title/1 19 | ``` 20 | 21 | Input to functions can be in ```charlist()``` or ```unicode_binary()``` format. 22 | 23 | charlist() is a list of unicode code points, such as: ```[50504,45397,54616,49464,50836]``` means ```"안녕하세요"```. 24 | unicode_binary() is a binary string with utf8 encoding, such as: ```<<236,149,136,235,133,149,237,149,152,236,132,184,236,154,148>>``` means ```"안녕하세요"```. 25 | 26 | charlist() and unicode_binary() can be converted from one to another using the [unicode module](http://www.erlang.org/doc/man/unicode.html) in Erlang **R16B** or later. 27 | 28 | ##Examples 29 | Make sure to start your erl shell with ```erl +pc unicode``` 30 | Make sure to add ```%% -*- coding: utf-8 -*-``` to the beginning of your ```*.erl``` source code files if you will be using unicode characters inside of quotes. 31 | If you are creating unicode binary strings, make sure to append **/utf8** after the string's end quote mark inside of <<>> such as: ```<<"안녕하세요"/utf8>>. 32 | 33 | ```erlang 34 | %% unistring:to_lower/1 35 | 36 | "test" = unistring:to_lower("TEST"). 37 | <<"test">> = unistring:to_lower(<<"TEST">>). 38 | "привет" = unistring:to_lower("ПРИВЕТ"). 39 | <<"привет"/utf8>> = unistring:to_lower(<<"ПРИВЕТ"/utf8>>). 40 | 41 | %% unistring:to_upper/1 42 | "TEST" = unistring:to_upper("test"). 43 | <<"TEST">> = unistring:to_upper(<<"test">>). 44 | "ПРИВЕТ" = unistring:to_upper("привет"). 45 | <<"ПРИВЕТ"/utf8>> = unistring:to_upper(<<"привет"/utf8>>). 46 | ``` 47 | 48 | ##Note 49 | I don't really see the point of ```unistring:to_title/1``` but it is included in this library for completeness. --------------------------------------------------------------------------------