├── .gitignore ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── extlib ├── UTF8-CPP │ └── include │ │ └── utf8 │ │ ├── checked.h │ │ ├── core.h │ │ ├── unchecked.h │ │ └── utf8.h └── u5e │ ├── LICENSE │ ├── include │ └── u5e │ │ ├── basic_encodedstring.hpp │ │ ├── basic_grapheme.hpp │ │ ├── basic_grapheme_iterator.hpp │ │ ├── canonical_combining_order.hpp │ │ ├── canonical_composition.hpp │ │ ├── canonical_decomposition.hpp │ │ ├── codepoint.hpp │ │ ├── codepoint_decomposition.hpp │ │ ├── codepoint_traits.hpp │ │ ├── compatibility_and_canonical_decomposition.hpp │ │ ├── encoding_assertion.hpp │ │ ├── filter.hpp │ │ ├── iterator_assertion.hpp │ │ ├── normalization_form_c.hpp │ │ ├── normalization_form_d.hpp │ │ ├── normalization_form_kc.hpp │ │ ├── normalization_form_kd.hpp │ │ ├── props │ │ ├── canonical_combining_class.hpp │ │ ├── canonical_composition_mapping.hpp │ │ ├── canonical_decomposition_mapping.hpp │ │ ├── compatibility_and_canonical_decomposition_mapping.hpp │ │ └── grapheme_cluster_break.hpp │ │ ├── utf32ne.hpp │ │ ├── utf32ne_string.hpp │ │ ├── utf32ne_string_grapheme.hpp │ │ ├── utf32ne_string_grapheme_iterator.hpp │ │ ├── utf32ne_string_view.hpp │ │ ├── utf32ne_string_view_grapheme.hpp │ │ ├── utf32ne_string_view_grapheme_iterator.hpp │ │ ├── utf8.hpp │ │ ├── utf8_bounds.hpp │ │ ├── utf8_iterator.hpp │ │ ├── utf8_string.hpp │ │ ├── utf8_string_grapheme.hpp │ │ ├── utf8_string_grapheme_iterator.hpp │ │ ├── utf8_string_view.hpp │ │ ├── utf8_string_view_grapheme.hpp │ │ ├── utf8_string_view_grapheme_iterator.hpp │ │ ├── utf8_util.hpp │ │ └── version.hpp │ └── src │ └── u5e │ ├── props │ ├── CompositionExclusions.txt │ ├── GraphemeBreakProperty.txt │ ├── UnicodeData.txt │ ├── canonical_combining_class.cpp │ ├── canonical_combining_class_data.hpp │ ├── canonical_combining_class_data.pl │ ├── canonical_composition_mapping.cpp │ ├── canonical_composition_mapping_data.hpp │ ├── canonical_composition_mapping_data.pl │ ├── canonical_decomposition_mapping.cpp │ ├── canonical_decomposition_mapping_data.hpp │ ├── canonical_decomposition_mapping_data.pl │ ├── compatibility_and_canonical_decomposition_mapping.cpp │ ├── compatibility_and_canonical_decomposition_mapping_data.hpp │ ├── compatibility_and_canonical_decomposition_mapping_data.pl │ ├── grapheme_cluster_break.cpp │ ├── grapheme_cluster_break_data.hpp │ └── grapheme_cluster_break_data.sh │ └── version.cpp ├── include └── Aheuiplusplus │ ├── Aheuiplusplus.hpp │ ├── code.hpp │ ├── command_line.hpp │ ├── cursor.hpp │ ├── debugger.hpp │ ├── element.hpp │ ├── extension.hpp │ ├── function.hpp │ ├── interpreter.hpp │ ├── mode.hpp │ ├── namespace.hpp │ ├── storage.hpp │ └── version.hpp └── src ├── command_line.cpp ├── cursor.cpp ├── debugger.cpp ├── element.cpp ├── extension.cpp ├── function.cpp ├── interpreter.cpp ├── main.cpp ├── mode.cpp ├── namespace.cpp ├── storage.cpp └── version_.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | test*.* 3 | 4 | # CMake 5 | CMakeFiles/ 6 | CMakeScripts/ 7 | CMakeCache.txt 8 | cmake_install.cmake 9 | install_manifest.txt 10 | compile_commands.json 11 | CTestTestfile.cmake 12 | 13 | # Make 14 | Makefile 15 | 16 | # Visual Studio 17 | .vs/ 18 | *.sln 19 | *.vcxproj 20 | *.filters 21 | *.user -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8.0) 2 | project(Aheuiplusplus CXX) 3 | 4 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 5 | set(CMAKE_CXX_STANDARD 17) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set(CMAKE_CXX_EXTENSIONS OFF) 8 | 9 | set(INCLUDE_DIR "./include") 10 | set(SOURCE_DIR "./src") 11 | set(OUTPUT_DIR "./bin") 12 | set(EXTLIB_DIR "./extlib") 13 | 14 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_DIR}) 15 | 16 | # Cache 17 | ## COMPILE_TARGET 18 | set(COMPILE_TARGET "Executable" CACHE STRING "아희++을 어떤 형태로 컴파일 할지 설정합니다.") 19 | set_property(CACHE COMPILE_TARGET PROPERTY STRINGS "Executable" "Library") 20 | string(TOLOWER ${COMPILE_TARGET} COMPILE_TARGET) 21 | 22 | if(${COMPILE_TARGET} STREQUAL "e" OR ${COMPILE_TARGET} STREQUAL "exe") 23 | set(COMPILE_TARGET "executable") 24 | elseif(${COMPILE_TARGET} STREQUAL "l" OR ${COMPILE_TARGET} STREQUAL "lib") 25 | set(COMPILE_TARGET "library") 26 | endif(${COMPILE_TARGET} STREQUAL "e" OR ${COMPILE_TARGET} STREQUAL "exe") 27 | 28 | ## USE_EXTENSION 29 | set(USE_EXTENSION ON CACHE BOOL "아희++ 표준 인터프리터 확장을 사용할지 설정합니다.") 30 | 31 | ## PRINT_BENCHMARK 32 | set(PRINT_BENCHMARK OFF CACHE BOOL "아희++ 표준 인터프리터의 성능을 출력할지 설정합니다.") 33 | 34 | # Searching files 35 | ## Header files 36 | include_directories(${INCLUDE_DIR}) 37 | include_directories(${EXTLIB_DIR}/u5e/include) 38 | 39 | ## Source files 40 | file(GLOB SOURCE_LIST ${SOURCE_DIR}/*.cpp) 41 | file(GLOB EXTLIB_U5E_SOURCE_LIST ${EXTLIB_DIR}/u5e/src/u5e/*.cpp ${EXTLIB_DIR}/u5e/src/u5e/props/*.cpp) 42 | 43 | # Compilation 44 | add_definitions(-D__STDC_CONSTANT_MACROS) 45 | add_definitions(-D__STDC_LIMIT_MACROS) 46 | 47 | if(${COMPILE_TARGET} STREQUAL "executable") 48 | add_definitions(-DAHEUIPLUSPLUS_TARGET=1) 49 | 50 | add_executable(${PROJECT_NAME} ${SOURCE_LIST} ${EXTLIB_U5E_SOURCE_LIST}) 51 | elseif(${COMPILE_TARGET} STREQUAL "library") 52 | add_definitions(-DAHEUIPLUSPLUS_TARGET=2) 53 | 54 | add_library(${PROJECT_NAME} STATIC ${SOURCE_LIST} ${EXTLIB_U5E_SOURCE_LIST}) 55 | endif(${COMPILE_TARGET} STREQUAL "executable") 56 | 57 | if(${USE_EXTENSION}) 58 | add_definitions(-DAHEUIPLUSPLUS_USE_EXTENSION) 59 | endif(${USE_EXTENSION}) 60 | 61 | if(${PRINT_BENCHMARK}) 62 | add_definitions(-DAHEUIPLUSPLUS_PRINT_BENCHMARK) 63 | endif(${PRINT_BENCHMARK}) 64 | 65 | # Installation 66 | if(${COMPILE_TARGET} STREQUAL "executable") 67 | install(TARGETS ${PROJECT_NAME} DESTINATION bin) 68 | elseif(${COMPILE_TARGET} STREQUAL "library") 69 | install(TARGETS ${PROJECT_NAME} DESTINATION lib) 70 | 71 | ## Header files 72 | file(GLOB HEADER_LIST ${INCLUDE_DIR}/Aheuiplusplus/*.hpp) 73 | file(GLOB EXTLIB_U5E_HEADER_LIST ${EXTLIB_DIR}/u5e/include/u5e/*.hpp) 74 | file(GLOB EXTLIB_U5E_PROPS_HEADER_LIST ${EXTLIB_DIR}/u5e/include/u5e/props/*.hpp) 75 | file(GLOB EXTLIB_UTF8_CPP_HEADER_LIST ${EXTLIB_DIR}/UTF8-CPP/include/utf8/*.h) 76 | 77 | install(FILES ${HEADER_LIST} DESTINATION include/Aheuiplusplus) 78 | install(FILES ${EXTLIB_U5E_HEADER_LIST} DESTINATION include/u5e) 79 | install(FILES ${EXTLIB_U5E_PROPS_HEADER_LIST} DESTINATION include/u5e/props) 80 | install(FILES ${EXTLIB_UTF8_CPP_HEADER_LIST} DESTINATION include/utf8) 81 | endif(${COMPILE_TARGET} STREQUAL "executable") -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 아희++ 표준 인터프리터에 대한 기여에 관한 작은 규칙 2 | 최신 버전이 아닐 수 있습니다. 최신 버전은 [이곳](https://github.com/kmc7468/Aheuiplusplus/blob/master/CONTRIBUTING.md)에서 확인할 수 있습니다. 3 | ## 기본적인 예의 4 | - 커밋 제목 및 메세지, 커밋에 포함된 기여 내용 등 모든 부분에서 기본적인 예의를 준수하여 주십시오. 5 | - 예를 들어, 비존대어를 사용하거나, 비속어를 사용하지 마십시오. 6 | - 커밋 제목 및 메세지, 커밋에 포함된 기여 내용 등 모든 부분에서 인권 및 기여자의 소속 국가의 법률을 준수하여 주십시오. 7 | - 예를 들어, 라이선스 문제가 있는 소스 코드를 사용하거나, 커밋 메세지에 차별적 표현을 사용하지 마십시오. 8 | ## 파일 9 | - 모든 텍스트 파일은 **BOM이 있는 UTF-8**로 인코딩 해 주십시오. 10 | - 모든 헤더 파일 및 소스 파일의 줄바꿈 형식은 **CRLF**(`"\r\n"`)로 해주십시오. 11 | ## 커밋 12 | - 커밋 제목 및 메세지는 반드시 **한국어의 표준어**로 작성해 주십시오. 13 | - 커밋 제목은 동사로 끝나는 명사형(예: ~ 수정, ~ 업데이트, ~ 개선 등)을 사용해 주십시오. 14 | - 반드시 master 브랜치에만 커밋하여 주십시오. 15 | ## PR과 이슈 16 | - 반드시 master 브랜치에 대하여 열어 주십시오. 17 | - PR은 반드시 수락되는 것은 아님에 유의하십시오. 18 | ## 프로그래밍 19 | - 소스 코드는 반드시 크로스 플랫폼이 가능하도록 프로그래밍 하여 주십시오. 20 | ## 브랜치 21 | - **master 브랜치**
22 | 주 브랜치로, 모든 커밋은 반드시 이 브랜치에만 해야 합니다. 23 | - **stable 브랜치**
24 | master 브랜치에서 버전의 개발이 완료되어 릴리즈를 할 준비가 완료되었으며, 정식 출시가 가능한 안정화 된 버전일 경우 master 브랜치에서 stable 브랜치로 커밋을 병합합니다. 25 | - **pre-release 브랜치**
26 | master 브랜치에서 버전의 개발이 완료되어 릴리즈를 할 준비가 완료되었으나, 정식 출시 전 프리릴리즈일 경우 master 브랜치에서 pre-release 브랜치로 커밋을 병합합니다. 27 | ### 브랜칭 전략 28 | - 신규 기능을 구현할 때, 작업이 오래 걸릴 것으로 보이는 기능을 구현한다면 `"feature/(기능 이름)"`의 이름을 가진 브랜치를 master 브랜치에서 분기시킬 수 있습니다. 작업이 끝난 후에는 다시 master 브랜치로 병합시켜야 합니다. 병합을 한 후에는 브랜치를 삭제합니다. 29 | - 기능 이름은 알파벳 소문자, 언더바(`'_'`)로만 이루어진 명령문 형태의 영어 문장으로 되어 있어야 합니다. 문장은 최대한 간결하게 만듭니다. 예를 들어, `encoding` 클래스를 추가하는 작업을 할 예정이라면, 브랜치 이름을 `"feature/add_encoding_class"`로 지으면 됩니다. 30 | - 해당 브랜치에는 해당 기능 구현과 관련 없는 작업은 하지 마십시오. 31 | - 해당 브랜치에는 예외적으로 이슈와 PR을 넣을 수 있습니다. 32 | - 버그를 수정할 때, 작업이 오래 걸릴 것으로 보이는 기능을 구현한다면 `"bugfix/(버그 이름)"`의 이름을 가진 브랜치를 master 브랜치에서 분기시킬 수 있습니다. 작업이 끝난 후에는 다시 master 브랜치로 병합시켜야 합니다. 병합을 한 후에는 브랜치를 삭제합니다. 33 | - 버그 이름은 알파벳 소문자, 언더바(`'_'`)로만 이루어진 명령문 형태의 영어 문장으로 되어 있어야 합니다. 문장은 최대한 간결하게 만듭니다. 예를 들어, 리눅스에서 문자 입력이 되지 않는 버그를 수정할 예정이라면, 브랜치 이름을 `"bugfix/cannot_read_character"`로 지으면 됩니다. 34 | - 해당 브랜치에는 예외적으로 이슈와 PR을 넣을 수 있습니다. 35 | - 이미 출시된 릴리즈에 심각한 버그가 있을 경우 master 브랜치에서 stable 또는 pre-release 브랜치로 병합하는 커밋(안정된 릴리즈일 경우 stable 브랜치로, 프리릴리즈일 경우 pre-release 브랜치로 병합하는 커밋에서 분기합니다.)에서 `"bugfix/(버그 이름)"`의 이름을 가진 브랜치를 master 브랜치에서 분기한 후, 버그를 수정한 후 master 브랜치 및 stable 또는 pre-release 브랜치(안정된 릴리즈일 경우 stable 브랜치로, 프리릴리즈일 경우 pre-release 브랜치로 병합합니다.)로 병합합니다. 이때, master 브랜치에 먼저 병합을 한 후에 stable 또는 pre-release 브랜치에 병합해야 합니다. 병합을 한 후에는 브랜치를 삭제합니다. 36 | - 단, 프리릴리즈를 포함하여 가장 최신 릴리즈에서 심각한 버그가 발견되었을 경우, master 브랜치에 부 버전을 올려야 하는 기능 구현을 하지 않았다면 브랜치를 분기하지 않고 master 브랜치에서 작업합니다(단, 이 경우에도 버그의 수정이 오래 걸릴 것으로 보이면 브랜치를 분기할 수 있습니다.). 37 | - 해당 브랜치에는 예외적으로 이슈와 PR을 넣을 수 있습니다. 38 | ## 릴리즈 절차 39 | 1. master 브랜치에서 특정 버전에 대한 개발을 완료합니다. 40 | 2. 만약 정식 출시가 가능한 안정화 된 버전일 경우 stable 브랜치로 커밋을 병합합니다. 만약 정식 출시 전 프리릴리즈일 경우 pre-release 브랜치로 커밋을 병합합니다. 41 | 3. 만약 stable 브랜치로 병합했다면, stable 브랜치를 pre-release 브랜치에 병합합니다. master에서 pre-release로 병합하면 안됩니다. 42 | 4. Release를 작성합니다. 반드시 기존에 작성된 Release 게시글의 형식을 따라 주십시오. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 kmc7468 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **개발이 중단되었습니다! 최초로 통로를 제대로 구현한 아희 인터프리터 [톡희](https://github.com/kmc7468/talkheui)는 어떠신가요?** 2 | 3 | [![license](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://shields.io/) [![language](https://img.shields.io/badge/language-C%2B%2B17-blue.svg)](https://shields.io/) 4 | # 아희++ 5 | 아희와 호환성이 있는 난해한 객체지향 한글 프로그래밍 언어 6 | - 인터프리터 버전: 2.0.0 (불안정한 버전) 7 | - 다른 버전: [1.2.1](https://github.com/kmc7468/Aheuiplusplus/tree/version/1.2.1) 8 | - 아직 개발중인 버전입니다. 9 | 10 | **개발이 중단되었습니다! 최초로 통로를 제대로 구현한 아희 인터프리터 [톡희](https://github.com/kmc7468/talkheui)는 어떠신가요?** 11 | ## [레퍼런스](https://github.com/kmc7468/Aheuiplusplus/wiki) 12 | 아희++의 표준안과 예제를 수록하고 있습니다. 표준안에 애매한 내용이나 질문이 있다면 이슈 등을 통해 알려주시면 감사하겠습니다. 13 | ## 아희++ 표준 인터프리터의 특징 14 | - **강력한 유니코드 지원**
15 | 코드에 이모지 등의 2개 이상의 코드 포인트로 구성되는 다양한 유니코드 문자를 사용해도 1글자로 정상적으로 인식합니다. 16 | - **범용성**
17 | 아희++은 물론이며, 아희도 인터프리팅 할 수 있습니다. 18 | - **통로 지원**
19 | 아희 구현체 최초로 통로를 *제대로* 지원합니다. C++로 작성된 '아희++ 표준 인터프리터 확장'을 연결하면 통로를 통해 확장과 통신할 수 있습니다. C++을 사용할 수 있다면 누구나 확장을 만들 수 있습니다. 20 | - **높은 이식성**
21 | 아희++ 표준 인터프리터의 모든 소스 코드는 컴파일러 확장 등이 사용되지 않아 C++17 표준을 정상적으로 지원하는 컴파일러 모두에서 정상적으로 컴파일 될 수 있습니다. 22 | ## 컴파일 23 | ### 필요한 소프트웨어 24 | - CMake 3.8.0 이상 25 | - C++17 표준을 정상적으로 지원하는 C++ 컴파일러 26 | ### 컴파일 방법 27 | 1. 이 레포지토리를 로컬에 복제합니다. 28 | 2. 복제된 디렉토리 내부에 있는 CMakeLists.txt 파일을 CMake로 실행합니다. 29 | 3. CMake가 생성한 빌드 스크립트를 적절한 소프트웨어로 실행합니다. 30 | ### Git+Makefile 31 | ``` 32 | $ git clone -b stable https://github.com/kmc7468/Aheuiplusplus.git 33 | $ cd ./Aheuiplusplus 34 | $ cmake CMakeLists.txt 35 | $ make 36 | ``` 37 | `-b stable` 옵션은 릴리즈 중 *정식 버전만* 보았을 때, 가장 최신의 릴리즈의 소스 코드를 복제하도록 하는 옵션입니다. `-b pre-release` 옵션으로 수정할 경우 모든 릴리즈 중 가장 최신의 릴리즈의 소스 코드를 복제하게 되며, 옵션을 삭제할 경우 여기에 릴리즈 되지 않은 소스 코드도 포함해 가장 최신의 소스 코드를 복제하게 됩니다. 옵션을 삭제하는 것은 권장되지 않습니다. 38 | ### CMake 옵션 39 | - `COMPILE_TARGET`:
40 | 아희++ 표준 인터프리터를 실행 파일의 형태로 컴파일 할지, 정적 라이브러리의 형태로 컴파일 할지 설정하는 옵션입니다. 41 | - 값은 `Executable`, `Library` 중 하나이며, 대소문자는 구분되지 않습니다. 전자는 실행 파일, 후자는 정적 라이브러리의 형태를 의미합니다. 42 | - `Executable`은 `E`와 `Exe`로 축약할 수 있으며, `Library`는 `L`과 `Lib`로 축약할 수 있습니다. 축약형 역시 대소문자는 구분되지 않습니다. 43 | - `USE_EXTENSION`:
44 | 아희++ 표준 인터프리터 확장을 사용할지 설정하는 옵션합니다. 45 | - `PRINT_BENCHMARK`:
46 | 아희++ 표준 인터프리터의 성능을 출력할지 설정하는 옵션입니다. 47 | ## 예제 48 | 더 많은 예제는 레퍼런스에서 확인하실 수 있습니다. 49 | ### [개발자 수다방](https://gist.github.com/RanolP/6ecb4b1030fccad19dc05f3716d6c2c7) by [RanolP](https://gist.github.com/RanolP) 50 | ``` 51 | 개반뭉반붓밪두빥붖빠뭏밠뭉박누망뭏따뿌삭뿌밪붅파투밣뚜타댜뎌뭏뷺다두타두밢두밙뚜빥푸다뿑빠뿌빥분받뚜삽쑤밪불빥두받투밧누 52 | 발꾔바몽나몽망봀타뽀바몽맣본빠몽밤봃싹뫃빠소따뽅빥볼타빠쑺봃밠뽅소두봎뭏또두볻두봃쑵봃붖뽀뿌토붅또투도수소뚜도푸토뭏본뭉 53 | 자두변번뻕떠벌벚멓더떠벓벐더머퍼뻕더뻕벒뻕더벇뻕떠벐번멓서볻퍼두뫃불포두봀뭏뽅뭏뽅투뫃불속뭏볾뚜쏩뭏뽅투뫃뿑노투도분소붋 54 | 수뺝리밪밤따다맣밪타빥밠빥파타반밧나타타삭맣사맣밢타빥맣발다뽅맣속타뽅빥본밦토밦도밞토따도사뫃빠뽀밦도맣속반봇밠뽅삭뫃뿌 55 | 다총통각하만세삼창해멓북번붏멓뚜벖두뻕숙멓붊번붇썩투퍼투너뚜벓수멓두번푸뻕푸터두번불벚두벘뿑벐뿑더뿑벑숮멓투떠붍번뿌떠붐 56 | 방망희됴아하는난로당도너또범토더봆벌토벌토더토너뽀퍼뽅터봇번볻뻐속멓토머볾터포뻕뽅떠뫃더토더토퍼본더뫃뻐속멓봆더도뻕또더 57 | ``` 58 | 출력: `2018.07.11. 개발자 수다방: 텔레그램 에디션 제 1회 개천절 경축!` 59 | ## 외부 라이브러리 60 | 아래에 열거된 외부 라이브러리들은 아희++ 표준 인터프리터를 컴파일 할 때 같이 컴파일 되므로 추가적인 작업이 필요하지 않습니다. 61 | - [u5e](https://github.com/ruoso/u5e)의 커밋 [3b970d5](https://github.com/ruoso/u5e/tree/3b970d5bc251fdef341d039d66c84ec5eaf4cb6a) - 2-clause BSD license 62 | - include/u5e/basic_grapheme_iterator.hpp 파일의 159번 줄 및 170번 줄이 수정되었습니다. (커밋 [abca129](https://github.com/kmc7468/Aheuiplusplus/commit/abca1292fe6c421d835516e00b33d62ae5710200)) 63 | - [UTF8-CPP](https://github.com/nemtrif/utfcpp) 2.3.5 64 | ## 이런 프로젝트는 어떠세요? 65 | - [TemplatedAH](https://github.com/kmc7468/TemplatedAH) - 템플릿 메타 프로그래밍을 이용한 아희 인터프리터 66 | - **[톡희](https://github.com/kmc7468/talkheui) - 새로운 아희 인터프리터**
67 | 최초로 통로를 제대로 구현한 아희 인터프리터! 아희++의 정신적 후속작입니다. 68 | ## 라이선스 69 | 아희++ 표준 인터프리터의 모든 소스 코드는 MIT 라이선스가 적용됩니다. 단, 외부 라이브러리에는 적용되지 않습니다. 70 | ``` 71 | MIT License 72 | 73 | Copyright (c) 2018 kmc7468 74 | 75 | Permission is hereby granted, free of charge, to any person obtaining a copy 76 | of this software and associated documentation files (the "Software"), to deal 77 | in the Software without restriction, including without limitation the rights 78 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 79 | copies of the Software, and to permit persons to whom the Software is 80 | furnished to do so, subject to the following conditions: 81 | 82 | The above copyright notice and this permission notice shall be included in all 83 | copies or substantial portions of the Software. 84 | 85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 86 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 87 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 88 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 89 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 90 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 91 | SOFTWARE. 92 | ``` -------------------------------------------------------------------------------- /extlib/UTF8-CPP/include/utf8/checked.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006-2016 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include "core.h" 32 | #include 33 | 34 | namespace utf8 35 | { 36 | // Base for the exceptions that may be thrown from the library 37 | class exception : public ::std::exception { 38 | }; 39 | 40 | // Exceptions that may be thrown from the library functions. 41 | class invalid_code_point : public exception { 42 | uint32_t cp; 43 | public: 44 | invalid_code_point(uint32_t codepoint) : cp(codepoint) {} 45 | virtual const char* what() const throw() { return "Invalid code point"; } 46 | uint32_t code_point() const {return cp;} 47 | }; 48 | 49 | class invalid_utf8 : public exception { 50 | uint8_t u8; 51 | public: 52 | invalid_utf8 (uint8_t u) : u8(u) {} 53 | virtual const char* what() const throw() { return "Invalid UTF-8"; } 54 | uint8_t utf8_octet() const {return u8;} 55 | }; 56 | 57 | class invalid_utf16 : public exception { 58 | uint16_t u16; 59 | public: 60 | invalid_utf16 (uint16_t u) : u16(u) {} 61 | virtual const char* what() const throw() { return "Invalid UTF-16"; } 62 | uint16_t utf16_word() const {return u16;} 63 | }; 64 | 65 | class not_enough_room : public exception { 66 | public: 67 | virtual const char* what() const throw() { return "Not enough space"; } 68 | }; 69 | 70 | /// The library API - functions intended to be called by the users 71 | 72 | template 73 | octet_iterator append(uint32_t cp, octet_iterator result) 74 | { 75 | if (!utf8::internal::is_code_point_valid(cp)) 76 | throw invalid_code_point(cp); 77 | 78 | if (cp < 0x80) // one octet 79 | *(result++) = static_cast(cp); 80 | else if (cp < 0x800) { // two octets 81 | *(result++) = static_cast((cp >> 6) | 0xc0); 82 | *(result++) = static_cast((cp & 0x3f) | 0x80); 83 | } 84 | else if (cp < 0x10000) { // three octets 85 | *(result++) = static_cast((cp >> 12) | 0xe0); 86 | *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); 87 | *(result++) = static_cast((cp & 0x3f) | 0x80); 88 | } 89 | else { // four octets 90 | *(result++) = static_cast((cp >> 18) | 0xf0); 91 | *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); 92 | *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); 93 | *(result++) = static_cast((cp & 0x3f) | 0x80); 94 | } 95 | return result; 96 | } 97 | 98 | template 99 | output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) 100 | { 101 | while (start != end) { 102 | octet_iterator sequence_start = start; 103 | internal::utf_error err_code = utf8::internal::validate_next(start, end); 104 | switch (err_code) { 105 | case internal::UTF8_OK : 106 | for (octet_iterator it = sequence_start; it != start; ++it) 107 | *out++ = *it; 108 | break; 109 | case internal::NOT_ENOUGH_ROOM: 110 | throw not_enough_room(); 111 | case internal::INVALID_LEAD: 112 | out = utf8::append (replacement, out); 113 | ++start; 114 | break; 115 | case internal::INCOMPLETE_SEQUENCE: 116 | case internal::OVERLONG_SEQUENCE: 117 | case internal::INVALID_CODE_POINT: 118 | out = utf8::append (replacement, out); 119 | ++start; 120 | // just one replacement mark for the sequence 121 | while (start != end && utf8::internal::is_trail(*start)) 122 | ++start; 123 | break; 124 | } 125 | } 126 | return out; 127 | } 128 | 129 | template 130 | inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) 131 | { 132 | static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); 133 | return utf8::replace_invalid(start, end, out, replacement_marker); 134 | } 135 | 136 | template 137 | uint32_t next(octet_iterator& it, octet_iterator end) 138 | { 139 | uint32_t cp = 0; 140 | internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); 141 | switch (err_code) { 142 | case internal::UTF8_OK : 143 | break; 144 | case internal::NOT_ENOUGH_ROOM : 145 | throw not_enough_room(); 146 | case internal::INVALID_LEAD : 147 | case internal::INCOMPLETE_SEQUENCE : 148 | case internal::OVERLONG_SEQUENCE : 149 | throw invalid_utf8(*it); 150 | case internal::INVALID_CODE_POINT : 151 | throw invalid_code_point(cp); 152 | } 153 | return cp; 154 | } 155 | 156 | template 157 | uint32_t peek_next(octet_iterator it, octet_iterator end) 158 | { 159 | return utf8::next(it, end); 160 | } 161 | 162 | template 163 | uint32_t prior(octet_iterator& it, octet_iterator start) 164 | { 165 | // can't do much if it == start 166 | if (it == start) 167 | throw not_enough_room(); 168 | 169 | octet_iterator end = it; 170 | // Go back until we hit either a lead octet or start 171 | while (utf8::internal::is_trail(*(--it))) 172 | if (it == start) 173 | throw invalid_utf8(*it); // error - no lead byte in the sequence 174 | return utf8::peek_next(it, end); 175 | } 176 | 177 | /// Deprecated in versions that include "prior" 178 | template 179 | uint32_t previous(octet_iterator& it, octet_iterator pass_start) 180 | { 181 | octet_iterator end = it; 182 | while (utf8::internal::is_trail(*(--it))) 183 | if (it == pass_start) 184 | throw invalid_utf8(*it); // error - no lead byte in the sequence 185 | octet_iterator temp = it; 186 | return utf8::next(temp, end); 187 | } 188 | 189 | template 190 | void advance (octet_iterator& it, distance_type n, octet_iterator end) 191 | { 192 | for (distance_type i = 0; i < n; ++i) 193 | utf8::next(it, end); 194 | } 195 | 196 | template 197 | typename std::iterator_traits::difference_type 198 | distance (octet_iterator first, octet_iterator last) 199 | { 200 | typename std::iterator_traits::difference_type dist; 201 | for (dist = 0; first < last; ++dist) 202 | utf8::next(first, last); 203 | return dist; 204 | } 205 | 206 | template 207 | octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) 208 | { 209 | while (start != end) { 210 | uint32_t cp = utf8::internal::mask16(*start++); 211 | // Take care of surrogate pairs first 212 | if (utf8::internal::is_lead_surrogate(cp)) { 213 | if (start != end) { 214 | uint32_t trail_surrogate = utf8::internal::mask16(*start++); 215 | if (utf8::internal::is_trail_surrogate(trail_surrogate)) 216 | cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; 217 | else 218 | throw invalid_utf16(static_cast(trail_surrogate)); 219 | } 220 | else 221 | throw invalid_utf16(static_cast(cp)); 222 | 223 | } 224 | // Lone trail surrogate 225 | else if (utf8::internal::is_trail_surrogate(cp)) 226 | throw invalid_utf16(static_cast(cp)); 227 | 228 | result = utf8::append(cp, result); 229 | } 230 | return result; 231 | } 232 | 233 | template 234 | u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) 235 | { 236 | while (start < end) { 237 | uint32_t cp = utf8::next(start, end); 238 | if (cp > 0xffff) { //make a surrogate pair 239 | *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); 240 | *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); 241 | } 242 | else 243 | *result++ = static_cast(cp); 244 | } 245 | return result; 246 | } 247 | 248 | template 249 | octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) 250 | { 251 | while (start != end) 252 | result = utf8::append(*(start++), result); 253 | 254 | return result; 255 | } 256 | 257 | template 258 | u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) 259 | { 260 | while (start < end) 261 | (*result++) = utf8::next(start, end); 262 | 263 | return result; 264 | } 265 | 266 | // The iterator class 267 | template 268 | class iterator : public std::iterator { 269 | octet_iterator it; 270 | octet_iterator range_start; 271 | octet_iterator range_end; 272 | public: 273 | iterator () {} 274 | explicit iterator (const octet_iterator& octet_it, 275 | const octet_iterator& rangestart, 276 | const octet_iterator& rangeend) : 277 | it(octet_it), range_start(rangestart), range_end(rangeend) 278 | { 279 | if (it < range_start || it > range_end) 280 | throw std::out_of_range("Invalid utf-8 iterator position"); 281 | } 282 | // the default "big three" are OK 283 | octet_iterator base () const { return it; } 284 | uint32_t operator * () const 285 | { 286 | octet_iterator temp = it; 287 | return utf8::next(temp, range_end); 288 | } 289 | bool operator == (const iterator& rhs) const 290 | { 291 | if (range_start != rhs.range_start || range_end != rhs.range_end) 292 | throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); 293 | return (it == rhs.it); 294 | } 295 | bool operator != (const iterator& rhs) const 296 | { 297 | return !(operator == (rhs)); 298 | } 299 | iterator& operator ++ () 300 | { 301 | utf8::next(it, range_end); 302 | return *this; 303 | } 304 | iterator operator ++ (int) 305 | { 306 | iterator temp = *this; 307 | utf8::next(it, range_end); 308 | return temp; 309 | } 310 | iterator& operator -- () 311 | { 312 | utf8::prior(it, range_start); 313 | return *this; 314 | } 315 | iterator operator -- (int) 316 | { 317 | iterator temp = *this; 318 | utf8::prior(it, range_start); 319 | return temp; 320 | } 321 | }; // class iterator 322 | 323 | } // namespace utf8 324 | 325 | #endif //header guard 326 | 327 | 328 | -------------------------------------------------------------------------------- /extlib/UTF8-CPP/include/utf8/core.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include 32 | 33 | namespace utf8 34 | { 35 | // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers 36 | // You may need to change them to match your system. 37 | // These typedefs have the same names as ones from cstdint, or boost/cstdint 38 | typedef unsigned char uint8_t; 39 | typedef unsigned short uint16_t; 40 | typedef unsigned int uint32_t; 41 | 42 | // Helper code - not intended to be directly called by the library users. May be changed at any time 43 | namespace internal 44 | { 45 | // Unicode constants 46 | // Leading (high) surrogates: 0xd800 - 0xdbff 47 | // Trailing (low) surrogates: 0xdc00 - 0xdfff 48 | const uint16_t LEAD_SURROGATE_MIN = 0xd800u; 49 | const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; 50 | const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; 51 | const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; 52 | const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); 53 | const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; 54 | 55 | // Maximum valid value for a Unicode code point 56 | const uint32_t CODE_POINT_MAX = 0x0010ffffu; 57 | 58 | template 59 | inline uint8_t mask8(octet_type oc) 60 | { 61 | return static_cast(0xff & oc); 62 | } 63 | template 64 | inline uint16_t mask16(u16_type oc) 65 | { 66 | return static_cast(0xffff & oc); 67 | } 68 | template 69 | inline bool is_trail(octet_type oc) 70 | { 71 | return ((utf8::internal::mask8(oc) >> 6) == 0x2); 72 | } 73 | 74 | template 75 | inline bool is_lead_surrogate(u16 cp) 76 | { 77 | return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); 78 | } 79 | 80 | template 81 | inline bool is_trail_surrogate(u16 cp) 82 | { 83 | return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); 84 | } 85 | 86 | template 87 | inline bool is_surrogate(u16 cp) 88 | { 89 | return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); 90 | } 91 | 92 | template 93 | inline bool is_code_point_valid(u32 cp) 94 | { 95 | return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); 96 | } 97 | 98 | template 99 | inline typename std::iterator_traits::difference_type 100 | sequence_length(octet_iterator lead_it) 101 | { 102 | uint8_t lead = utf8::internal::mask8(*lead_it); 103 | if (lead < 0x80) 104 | return 1; 105 | else if ((lead >> 5) == 0x6) 106 | return 2; 107 | else if ((lead >> 4) == 0xe) 108 | return 3; 109 | else if ((lead >> 3) == 0x1e) 110 | return 4; 111 | else 112 | return 0; 113 | } 114 | 115 | template 116 | inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) 117 | { 118 | if (cp < 0x80) { 119 | if (length != 1) 120 | return true; 121 | } 122 | else if (cp < 0x800) { 123 | if (length != 2) 124 | return true; 125 | } 126 | else if (cp < 0x10000) { 127 | if (length != 3) 128 | return true; 129 | } 130 | 131 | return false; 132 | } 133 | 134 | enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; 135 | 136 | /// Helper for get_sequence_x 137 | template 138 | utf_error increase_safely(octet_iterator& it, octet_iterator end) 139 | { 140 | if (++it == end) 141 | return NOT_ENOUGH_ROOM; 142 | 143 | if (!utf8::internal::is_trail(*it)) 144 | return INCOMPLETE_SEQUENCE; 145 | 146 | return UTF8_OK; 147 | } 148 | 149 | #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} 150 | 151 | /// get_sequence_x functions decode utf-8 sequences of the length x 152 | template 153 | utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) 154 | { 155 | if (it == end) 156 | return NOT_ENOUGH_ROOM; 157 | 158 | code_point = utf8::internal::mask8(*it); 159 | 160 | return UTF8_OK; 161 | } 162 | 163 | template 164 | utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) 165 | { 166 | if (it == end) 167 | return NOT_ENOUGH_ROOM; 168 | 169 | code_point = utf8::internal::mask8(*it); 170 | 171 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 172 | 173 | code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); 174 | 175 | return UTF8_OK; 176 | } 177 | 178 | template 179 | utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) 180 | { 181 | if (it == end) 182 | return NOT_ENOUGH_ROOM; 183 | 184 | code_point = utf8::internal::mask8(*it); 185 | 186 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 187 | 188 | code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); 189 | 190 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 191 | 192 | code_point += (*it) & 0x3f; 193 | 194 | return UTF8_OK; 195 | } 196 | 197 | template 198 | utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) 199 | { 200 | if (it == end) 201 | return NOT_ENOUGH_ROOM; 202 | 203 | code_point = utf8::internal::mask8(*it); 204 | 205 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 206 | 207 | code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); 208 | 209 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 210 | 211 | code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; 212 | 213 | UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) 214 | 215 | code_point += (*it) & 0x3f; 216 | 217 | return UTF8_OK; 218 | } 219 | 220 | #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR 221 | 222 | template 223 | utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) 224 | { 225 | if (it == end) 226 | return NOT_ENOUGH_ROOM; 227 | 228 | // Save the original value of it so we can go back in case of failure 229 | // Of course, it does not make much sense with i.e. stream iterators 230 | octet_iterator original_it = it; 231 | 232 | uint32_t cp = 0; 233 | // Determine the sequence length based on the lead octet 234 | typedef typename std::iterator_traits::difference_type octet_difference_type; 235 | const octet_difference_type length = utf8::internal::sequence_length(it); 236 | 237 | // Get trail octets and calculate the code point 238 | utf_error err = UTF8_OK; 239 | switch (length) { 240 | case 0: 241 | return INVALID_LEAD; 242 | case 1: 243 | err = utf8::internal::get_sequence_1(it, end, cp); 244 | break; 245 | case 2: 246 | err = utf8::internal::get_sequence_2(it, end, cp); 247 | break; 248 | case 3: 249 | err = utf8::internal::get_sequence_3(it, end, cp); 250 | break; 251 | case 4: 252 | err = utf8::internal::get_sequence_4(it, end, cp); 253 | break; 254 | } 255 | 256 | if (err == UTF8_OK) { 257 | // Decoding succeeded. Now, security checks... 258 | if (utf8::internal::is_code_point_valid(cp)) { 259 | if (!utf8::internal::is_overlong_sequence(cp, length)){ 260 | // Passed! Return here. 261 | code_point = cp; 262 | ++it; 263 | return UTF8_OK; 264 | } 265 | else 266 | err = OVERLONG_SEQUENCE; 267 | } 268 | else 269 | err = INVALID_CODE_POINT; 270 | } 271 | 272 | // Failure branch - restore the original value of the iterator 273 | it = original_it; 274 | return err; 275 | } 276 | 277 | template 278 | inline utf_error validate_next(octet_iterator& it, octet_iterator end) { 279 | uint32_t ignored; 280 | return utf8::internal::validate_next(it, end, ignored); 281 | } 282 | 283 | } // namespace internal 284 | 285 | /// The library API - functions intended to be called by the users 286 | 287 | // Byte order mark 288 | const uint8_t bom[] = {0xef, 0xbb, 0xbf}; 289 | 290 | template 291 | octet_iterator find_invalid(octet_iterator start, octet_iterator end) 292 | { 293 | octet_iterator result = start; 294 | while (result != end) { 295 | utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); 296 | if (err_code != internal::UTF8_OK) 297 | return result; 298 | } 299 | return result; 300 | } 301 | 302 | template 303 | inline bool is_valid(octet_iterator start, octet_iterator end) 304 | { 305 | return (utf8::find_invalid(start, end) == end); 306 | } 307 | 308 | template 309 | inline bool starts_with_bom (octet_iterator it, octet_iterator end) 310 | { 311 | return ( 312 | ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && 313 | ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && 314 | ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) 315 | ); 316 | } 317 | 318 | //Deprecated in release 2.3 319 | template 320 | inline bool is_bom (octet_iterator it) 321 | { 322 | return ( 323 | (utf8::internal::mask8(*it++)) == bom[0] && 324 | (utf8::internal::mask8(*it++)) == bom[1] && 325 | (utf8::internal::mask8(*it)) == bom[2] 326 | ); 327 | } 328 | } // namespace utf8 329 | 330 | #endif // header guard 331 | 332 | 333 | -------------------------------------------------------------------------------- /extlib/UTF8-CPP/include/utf8/unchecked.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include "core.h" 32 | 33 | namespace utf8 34 | { 35 | namespace unchecked 36 | { 37 | template 38 | octet_iterator append(uint32_t cp, octet_iterator result) 39 | { 40 | if (cp < 0x80) // one octet 41 | *(result++) = static_cast(cp); 42 | else if (cp < 0x800) { // two octets 43 | *(result++) = static_cast((cp >> 6) | 0xc0); 44 | *(result++) = static_cast((cp & 0x3f) | 0x80); 45 | } 46 | else if (cp < 0x10000) { // three octets 47 | *(result++) = static_cast((cp >> 12) | 0xe0); 48 | *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); 49 | *(result++) = static_cast((cp & 0x3f) | 0x80); 50 | } 51 | else { // four octets 52 | *(result++) = static_cast((cp >> 18) | 0xf0); 53 | *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); 54 | *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); 55 | *(result++) = static_cast((cp & 0x3f) | 0x80); 56 | } 57 | return result; 58 | } 59 | 60 | template 61 | uint32_t next(octet_iterator& it) 62 | { 63 | uint32_t cp = utf8::internal::mask8(*it); 64 | typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); 65 | switch (length) { 66 | case 1: 67 | break; 68 | case 2: 69 | it++; 70 | cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); 71 | break; 72 | case 3: 73 | ++it; 74 | cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); 75 | ++it; 76 | cp += (*it) & 0x3f; 77 | break; 78 | case 4: 79 | ++it; 80 | cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); 81 | ++it; 82 | cp += (utf8::internal::mask8(*it) << 6) & 0xfff; 83 | ++it; 84 | cp += (*it) & 0x3f; 85 | break; 86 | } 87 | ++it; 88 | return cp; 89 | } 90 | 91 | template 92 | uint32_t peek_next(octet_iterator it) 93 | { 94 | return utf8::unchecked::next(it); 95 | } 96 | 97 | template 98 | uint32_t prior(octet_iterator& it) 99 | { 100 | while (utf8::internal::is_trail(*(--it))) ; 101 | octet_iterator temp = it; 102 | return utf8::unchecked::next(temp); 103 | } 104 | 105 | // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) 106 | template 107 | inline uint32_t previous(octet_iterator& it) 108 | { 109 | return utf8::unchecked::prior(it); 110 | } 111 | 112 | template 113 | void advance (octet_iterator& it, distance_type n) 114 | { 115 | for (distance_type i = 0; i < n; ++i) 116 | utf8::unchecked::next(it); 117 | } 118 | 119 | template 120 | typename std::iterator_traits::difference_type 121 | distance (octet_iterator first, octet_iterator last) 122 | { 123 | typename std::iterator_traits::difference_type dist; 124 | for (dist = 0; first < last; ++dist) 125 | utf8::unchecked::next(first); 126 | return dist; 127 | } 128 | 129 | template 130 | octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) 131 | { 132 | while (start != end) { 133 | uint32_t cp = utf8::internal::mask16(*start++); 134 | // Take care of surrogate pairs first 135 | if (utf8::internal::is_lead_surrogate(cp)) { 136 | uint32_t trail_surrogate = utf8::internal::mask16(*start++); 137 | cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; 138 | } 139 | result = utf8::unchecked::append(cp, result); 140 | } 141 | return result; 142 | } 143 | 144 | template 145 | u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) 146 | { 147 | while (start < end) { 148 | uint32_t cp = utf8::unchecked::next(start); 149 | if (cp > 0xffff) { //make a surrogate pair 150 | *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); 151 | *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); 152 | } 153 | else 154 | *result++ = static_cast(cp); 155 | } 156 | return result; 157 | } 158 | 159 | template 160 | octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) 161 | { 162 | while (start != end) 163 | result = utf8::unchecked::append(*(start++), result); 164 | 165 | return result; 166 | } 167 | 168 | template 169 | u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) 170 | { 171 | while (start < end) 172 | (*result++) = utf8::unchecked::next(start); 173 | 174 | return result; 175 | } 176 | 177 | // The iterator class 178 | template 179 | class iterator : public std::iterator { 180 | octet_iterator it; 181 | public: 182 | iterator () {} 183 | explicit iterator (const octet_iterator& octet_it): it(octet_it) {} 184 | // the default "big three" are OK 185 | octet_iterator base () const { return it; } 186 | uint32_t operator * () const 187 | { 188 | octet_iterator temp = it; 189 | return utf8::unchecked::next(temp); 190 | } 191 | bool operator == (const iterator& rhs) const 192 | { 193 | return (it == rhs.it); 194 | } 195 | bool operator != (const iterator& rhs) const 196 | { 197 | return !(operator == (rhs)); 198 | } 199 | iterator& operator ++ () 200 | { 201 | ::std::advance(it, utf8::internal::sequence_length(it)); 202 | return *this; 203 | } 204 | iterator operator ++ (int) 205 | { 206 | iterator temp = *this; 207 | ::std::advance(it, utf8::internal::sequence_length(it)); 208 | return temp; 209 | } 210 | iterator& operator -- () 211 | { 212 | utf8::unchecked::prior(it); 213 | return *this; 214 | } 215 | iterator operator -- (int) 216 | { 217 | iterator temp = *this; 218 | utf8::unchecked::prior(it); 219 | return temp; 220 | } 221 | }; // class iterator 222 | 223 | } // namespace utf8::unchecked 224 | } // namespace utf8 225 | 226 | 227 | #endif // header guard 228 | 229 | -------------------------------------------------------------------------------- /extlib/UTF8-CPP/include/utf8/utf8.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include "checked.h" 32 | #include "unchecked.h" 33 | 34 | #endif // header guard 35 | -------------------------------------------------------------------------------- /extlib/u5e/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Daniel Ruoso 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/basic_encodedstring.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_BASIC_ENCODEDSTRING 2 | #define INCLUDED_U5E_BASIC_ENCODEDSTRING 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace u5e { 11 | template class basic_grapheme_iterator; 12 | 13 | /** 14 | * \brief basic encoding support over string-like objects. 15 | * 16 | * u5e::basic_encodedstring implements encoding support on top of a 17 | * string-like object, it is implemented by simply wrapping the 18 | * native string type in order to provide a customized iterator 19 | * that offers codepoint-by-codepoint access instead of iterating 20 | * over the native type. 21 | * 22 | * \tparam Encoding Text is always represented in a specific 23 | * encoding, there is no such thing as a "natural", or "native" 24 | * representation of text, for that reason, the encoding is a part 25 | * of the type. 26 | * 27 | * \tparam NativeString In order to re-use the string support, 28 | * this will always be implemented as a wrapper around an 29 | * native string-like type. The idea is that the C++ string 30 | * libraries operate on unencoded memory, while the u5e types 31 | * offer a layer on top of that for the purposes of implementing 32 | * unicode in a type-safe way. Note that this applies to any 33 | * 'string-like' object, such as string or string_view. 34 | */ 35 | template 37 | class basic_encodedstring { 38 | public: 39 | //@{ 40 | /** 41 | * Offer an interface such that the size of the thing you're 42 | * iterating over is a codepoint, regardless of the native 43 | * type. 44 | */ 45 | typedef u5e::codepoint_traits traits_type; 46 | typedef u5e::codepoint value_type; 47 | typedef u5e::codepoint_traits::pos_type size_type; 48 | typedef u5e::codepoint_traits::off_type difference_type; 49 | typedef value_type& reference; 50 | typedef const value_type& const_reference; 51 | typedef typename NativeString::pointer pointer; 52 | typedef typename NativeString::const_pointer const_pointer; 53 | //@} 54 | 55 | //@{ 56 | /** 57 | * The Encoding template argument must provide iterator and 58 | * const_iterator member types. Those should iterate over 59 | * codepoints, regardless of the encoding and the native type. 60 | * 61 | * The iterator and const_iterator member types must be themselves 62 | * templates that take the NativeString type as a template 63 | * argument. 64 | */ 65 | typedef typename Encoding::template iterator 66 | iterator; 67 | typedef typename Encoding::template const_iterator 68 | const_iterator; 69 | //@} 70 | 71 | //@{ 72 | /** 73 | * Delegated to std::reverse_iterator 74 | */ 75 | typedef std::reverse_iterator reverse_iterator; 76 | typedef std::reverse_iterator const_reverse_iterator; 77 | //@} 78 | 79 | /** 80 | * \brief Raw buffer as specified by the native type. 81 | * 82 | * This means that this class is exactly as expensive as whichever 83 | * native type is being used, it also means this class delegates 84 | * all memory management to that native type. 85 | * 86 | * This member is public because you should be able to completely 87 | * manage the native object if you need to. 88 | */ 89 | NativeString native_string; 90 | 91 | /** 92 | * Default constructor, delegated to the native type. 93 | */ 94 | basic_encodedstring() = default; 95 | 96 | /** 97 | * Implicit conversion from the native type. 98 | */ 99 | basic_encodedstring(const NativeString& s) 100 | : native_string(s) { }; 101 | 102 | /** 103 | * Assignment operator, assigns the native type. 104 | */ 105 | basic_encodedstring& 106 | operator= (const basic_encodedstring &other) { 107 | native_string = other; 108 | } 109 | 110 | //@{ 111 | /** 112 | * Get begin and end native iterators. 113 | */ 114 | inline typename NativeString::iterator native_begin() { 115 | return native_string.begin(); 116 | } 117 | inline typename NativeString::iterator native_end() { 118 | return native_string.end(); 119 | } 120 | inline typename NativeString::const_iterator native_cbegin() { 121 | return native_string.cbegin(); 122 | } 123 | inline typename NativeString::const_iterator native_cend() { 124 | return native_string.cend(); 125 | } 126 | //@} 127 | 128 | //@{ 129 | /** 130 | * Get begin and end codepoint iterators. 131 | */ 132 | inline iterator codepoint_begin() { 133 | return iterator(native_string.begin()); 134 | } 135 | inline iterator codepoint_end() { 136 | return iterator(native_string.end()); 137 | } 138 | inline const_iterator codepoint_cbegin() { 139 | return const_iterator(native_string.cbegin()); 140 | } 141 | inline const_iterator codepoint_cend() { 142 | return const_iterator(native_string.cend()); 143 | } 144 | //@} 145 | 146 | //@{ 147 | /** 148 | * Get begin and end grapheme iterators. 149 | * Graphemes are always built from the const iterators, since graphemes 150 | * are always immutable. 151 | */ 152 | inline basic_grapheme_iterator grapheme_begin() { 153 | basic_grapheme_iterator i(codepoint_cbegin(), 154 | codepoint_cend()); 155 | return i; 156 | } 157 | inline basic_grapheme_iterator grapheme_end() { 158 | basic_grapheme_iterator i(codepoint_cbegin(), 159 | codepoint_cend(), 160 | codepoint_cend()); 161 | return i; 162 | } 163 | //@} 164 | 165 | //@{ 166 | /** 167 | * Append from input iterators. 168 | * 169 | * Note that this is only possible from iterators of the same 170 | * encoding. This will not perform any conversion. 171 | */ 172 | template 173 | inline basic_encodedstring& append 174 | ( 175 | typename basic_encodedstring::const_iterator first, 176 | typename basic_encodedstring::const_iterator last 177 | ) { 178 | native_string.append 179 | (Encoding::template native_const_iterator(first), 180 | Encoding::template native_const_iterator(last) 181 | ); 182 | return *this; 183 | } 184 | 185 | inline basic_encodedstring& append 186 | (const_iterator first,const_iterator last 187 | ) { 188 | return append(first,last); 189 | } 190 | 191 | template 192 | inline basic_encodedstring& append 193 | (basic_grapheme_iterator>& first, 194 | basic_grapheme_iterator>& last) 195 | { 196 | native_string.append((*first).codepoint_begin(), 197 | (*last).codepoint_begin()); 198 | return *this; 199 | } 200 | 201 | inline basic_encodedstring& append 202 | (basic_grapheme_iterator& first, 203 | basic_grapheme_iterator& last) { 204 | return append(first, last); 205 | } 206 | 207 | template 208 | inline basic_encodedstring& append_from_utf32ne 209 | ( 210 | typename basic_encodedstring::const_iterator first, 211 | typename basic_encodedstring::const_iterator last 212 | ) { 213 | Encoding::append_from_utf32ne 214 | (utf32ne::template native_const_iterator(first), 215 | utf32ne::template native_const_iterator(last), 216 | native_string); 217 | return *this; 218 | } 219 | //@} 220 | 221 | }; 222 | 223 | } 224 | 225 | #endif 226 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/basic_grapheme.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_BASIC_GRAPHEME 2 | #define INCLUDED_U5E_BASIC_GRAPHEME 3 | 4 | namespace u5e { 5 | /** 6 | * \brief Represents a single grapheme cluster 7 | * 8 | * It works by holding start and end values for an underlying 9 | * encodedstring_view-like object. 10 | * 11 | * \tparam UnderlyingEncodedStringView a basic_encodedstring 12 | * instantiation. 13 | */ 14 | template 15 | class basic_grapheme { 16 | public: 17 | /** 18 | * const_codepoint_iterator allows you to traverse the codepoints 19 | * inside this grapheme. 20 | */ 21 | typedef typename UnderlyingEncodedStringView::const_iterator 22 | const_codepoint_iterator; 23 | 24 | private: 25 | /** 26 | * represents where the grapheme starts 27 | */ 28 | const_codepoint_iterator d_begin; 29 | 30 | /** 31 | * represents where the grapheme ends 32 | */ 33 | const_codepoint_iterator d_end; 34 | 35 | public: 36 | /** 37 | * Default constructor is only valid if the underlying type allows it 38 | */ 39 | basic_grapheme() {}; 40 | 41 | /** 42 | * Construct it with the iterators representing the begin and the 43 | * end of the grapheme. 44 | */ 45 | basic_grapheme(const_codepoint_iterator b, 46 | const_codepoint_iterator e) 47 | :d_begin(b), d_end(e) {}; 48 | 49 | /** 50 | * get the beginning of the codepoints 51 | */ 52 | const_codepoint_iterator codepoint_begin() { 53 | return d_begin; 54 | } 55 | 56 | /** 57 | * get the end of the codepoints 58 | */ 59 | const_codepoint_iterator codepoint_end() { 60 | return d_end; 61 | } 62 | }; 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/basic_grapheme_iterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_BASIC_GRAPHEME_ITERATOR 2 | #define INCLUDED_U5E_BASIC_GRAPHEME_ITERATOR 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | 9 | /** 10 | * \brief Iterator that describes full graphemes. 11 | * 12 | * \tparam UnderlyingEncodedStringView the underlying encoded string 13 | * type with an underlying native string-like type. 14 | */ 15 | template 16 | class basic_grapheme_iterator { 17 | public: 18 | /** 19 | * The type of the underlying encoded iterator 20 | */ 21 | typedef typename UnderlyingEncodedStringView::const_iterator 22 | const_codepoint_iterator; 23 | 24 | /** 25 | * the specific grapheme type for this encoded string view 26 | */ 27 | typedef basic_grapheme grapheme; 28 | typedef grapheme value_type; 29 | 30 | //@{ 31 | /** 32 | * The begin and end iterators for the whole text are necessary for 33 | * bounds check, since the size of graphemes cannot be predicted. 34 | */ 35 | const_codepoint_iterator begin_; 36 | const_codepoint_iterator end_; 37 | //@} 38 | //@{ 39 | /** 40 | * This par of iterators point to where we are now and where the end 41 | * of the current grapheme is. 42 | */ 43 | const_codepoint_iterator where_; 44 | const_codepoint_iterator end_of_grapheme_; 45 | //@} 46 | 47 | typedef props::grapheme_cluster_break::prop_value_type g_c_b_vt; 48 | 49 | /** 50 | * The unicode standard documents that a grapheme boundary can be 51 | * determined by looking just at two adjecent codepoints. 52 | */ 53 | bool is_grapheme_boundary(codepoint a, codepoint b) { 54 | g_c_b_vt va = props::grapheme_cluster_break::resolve(a); 55 | g_c_b_vt vb = props::grapheme_cluster_break::resolve(b); 56 | 57 | if (va == g_c_b_vt::CR && 58 | vb == g_c_b_vt::LF) { 59 | // GB3 60 | return false; 61 | } else if (va == g_c_b_vt::CR || 62 | va == g_c_b_vt::LF || 63 | va == g_c_b_vt::CONTROL) { 64 | // GB4 65 | return true; 66 | } else if (vb == g_c_b_vt::CR || 67 | vb == g_c_b_vt::LF || 68 | vb == g_c_b_vt::CONTROL) { 69 | // GB5 70 | return true; 71 | } else if (va == g_c_b_vt::L && 72 | (vb == g_c_b_vt::L || 73 | vb == g_c_b_vt::V || 74 | vb == g_c_b_vt::LV || 75 | vb == g_c_b_vt::LVT)) { 76 | // GB6 77 | return false; 78 | } else if ((va == g_c_b_vt::LV || 79 | va == g_c_b_vt::V) && 80 | (vb == g_c_b_vt::V || 81 | vb == g_c_b_vt::T)) { 82 | // GB7 83 | return false; 84 | } else if ((va == g_c_b_vt::LVT || 85 | va == g_c_b_vt::T) && 86 | vb == g_c_b_vt::T) { 87 | // GB8 88 | return false; 89 | } else if (vb == g_c_b_vt::EXTEND || 90 | vb == g_c_b_vt::ZWJ) { 91 | // GB9 92 | return false; 93 | } else if (vb == g_c_b_vt::SPACINGMARK) { 94 | // GB9a 95 | return false; 96 | } else if (va == g_c_b_vt::PREPEND) { 97 | // GB9b 98 | return false; 99 | } else if ( ( (va == g_c_b_vt::E_BASE || 100 | va == g_c_b_vt::E_BASE_GAZ) && 101 | vb == g_c_b_vt::E_MODIFIER) || 102 | ( va == g_c_b_vt::EXTEND && 103 | vb == g_c_b_vt::E_MODIFIER )) { 104 | // GB10 -- that is the interpretation I can make 105 | // of the combination of the fact that you should be able 106 | // to compare only two adjancent characters and the text of 107 | // the standard. 108 | return false; 109 | } else if (va == g_c_b_vt::ZWJ && 110 | (vb == g_c_b_vt::GLUE_AFTER_ZWJ || 111 | vb == g_c_b_vt::E_BASE_GAZ)) { 112 | // GB11 113 | return false; 114 | } else if (va == g_c_b_vt::REGIONAL_INDICATOR && 115 | vb == g_c_b_vt::REGIONAL_INDICATOR) { 116 | // GB12, GB13 117 | // again, I take the liberty to assume the earlier part of the text 118 | // that says you only need to look at two adjacent characters 119 | return false; 120 | } else { 121 | // GB999 122 | return true; 123 | } 124 | } 125 | 126 | //@{ 127 | /** 128 | * Use the data from the unicode database to find the start and 129 | * end of the current grapheme. 130 | */ 131 | void find_end_of_grapheme() { 132 | // GB2 133 | if (end_of_grapheme_ == end_) 134 | return; 135 | // advance end_of_grapheme_ until it's no longer in the same grapheme 136 | 137 | // GB1 138 | // this always start as where_ == end_of_grapheme_; 139 | codepoint a = *end_of_grapheme_; 140 | end_of_grapheme_++; 141 | 142 | while (1) { 143 | // GB2 144 | if (end_of_grapheme_ == end_) 145 | return; 146 | codepoint b = *end_of_grapheme_; 147 | 148 | if (is_grapheme_boundary(a, b)) { 149 | return; 150 | } 151 | 152 | a = b; 153 | end_of_grapheme_++; 154 | } 155 | } 156 | 157 | void find_start_of_grapheme() { 158 | // GB2 159 | if (where_ == begin_ || where_ == end_) 160 | return; 161 | // rewind where_ until it's no longer in the same grapheme 162 | 163 | // GB1 164 | // this always start as copy = where_ 165 | const_codepoint_iterator copy = where_; 166 | --copy; 167 | codepoint a = *copy; 168 | 169 | while (1) { 170 | if (where_ == begin_ || where_ == end_) 171 | return; 172 | codepoint b = *where_; 173 | 174 | if (is_grapheme_boundary(a, b)) { 175 | return; 176 | } 177 | 178 | a = b; 179 | --where_; 180 | } 181 | } 182 | //@} 183 | 184 | /** 185 | * \brief start at the beginning of the text 186 | */ 187 | basic_grapheme_iterator(const_codepoint_iterator b, 188 | const_codepoint_iterator e) 189 | :begin_(b), end_(e), where_(b), end_of_grapheme_(b) { 190 | find_end_of_grapheme(); 191 | }; 192 | 193 | /** 194 | * \brief start at a specific point 195 | * find the start and the end of the grapheme 196 | */ 197 | basic_grapheme_iterator(const_codepoint_iterator b, 198 | const_codepoint_iterator e, 199 | const_codepoint_iterator w) 200 | :begin_(b), end_(e), where_(w), end_of_grapheme_(w) { 201 | find_start_of_grapheme(); 202 | find_end_of_grapheme(); 203 | }; 204 | 205 | /** 206 | * \brief start at a specific point - precalculated 207 | * start and end of grapheme 208 | */ 209 | basic_grapheme_iterator(const_codepoint_iterator b, 210 | const_codepoint_iterator e, 211 | const_codepoint_iterator w, 212 | const_codepoint_iterator we) 213 | :begin_(b), end_(e), where_(w), end_of_grapheme_(we) { 214 | }; 215 | 216 | /** 217 | * \brief copy constructor 218 | */ 219 | basic_grapheme_iterator(const basic_grapheme_iterator& copy) 220 | :begin_(copy.begin_), end_(copy.end_), 221 | where_(copy.where_), end_of_grapheme_(copy.end_of_grapheme_) {} 222 | 223 | /** 224 | * dereference to a grapheme object 225 | */ 226 | grapheme operator*() { 227 | return grapheme(where_, end_of_grapheme_); 228 | } 229 | 230 | //@{ 231 | /** 232 | * advance one grapheme 233 | */ 234 | basic_grapheme_iterator operator++() { 235 | where_ = end_of_grapheme_; 236 | find_end_of_grapheme(); 237 | return *this; 238 | } 239 | 240 | basic_grapheme_iterator operator++(int i) { 241 | basic_grapheme_iterator copy(*this); 242 | ++(*this); 243 | return copy; 244 | } 245 | //@} 246 | 247 | /** 248 | * delegate the comparison to the underlying iterator 249 | */ 250 | bool operator==(const_codepoint_iterator other) { 251 | if (where_ == other) { 252 | return true; 253 | } else { 254 | for (const_codepoint_iterator copy = where_; 255 | copy != end_of_grapheme_; copy++) { 256 | if (copy == other) { 257 | return true; 258 | } 259 | } 260 | return false; 261 | } 262 | } 263 | 264 | /** 265 | * delegate the comparison to the underlying iterator 266 | */ 267 | bool operator==(basic_grapheme_iterator other) { 268 | if (where_ == end_ && 269 | other == end_) { 270 | return true; 271 | } else { 272 | for (const_codepoint_iterator copy = where_; 273 | copy != end_of_grapheme_; copy++) { 274 | if (other == copy) { 275 | return true; 276 | } 277 | } 278 | return false; 279 | } 280 | } 281 | 282 | /** 283 | * delegate the comparison to the underlying iterator 284 | */ 285 | bool operator!=(basic_grapheme_iterator other) { 286 | return !(*this == other); 287 | } 288 | 289 | /** 290 | * delegate the comparison to the underlying iterator 291 | */ 292 | bool operator!=(const_codepoint_iterator other) 293 | { 294 | return !(*this == other); 295 | } 296 | }; 297 | }; 298 | 299 | #endif 300 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/canonical_combining_order.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_CANONICAL_COMBINING_ORDER 2 | #define INCLUDED_U5E_CANONICAL_COMBINING_ORDER 3 | 4 | #include 5 | 6 | namespace u5e { 7 | 8 | /** 9 | * \brief compare codepoints according to the canonical combining order 10 | * 11 | * This is intended to be used with std::sort on a utf32ne string type. 12 | */ 13 | inline bool canonical_combining_order(int a, int b) { 14 | return 15 | props::canonical_combining_class::resolve(a) 16 | < 17 | props::canonical_combining_class::resolve(b); 18 | } 19 | } 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/canonical_composition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_CANONICAL_COMPOSITION 2 | #define INCLUDED_U5E_CANONICAL_COMPOSITION 3 | 4 | #include 5 | 6 | namespace u5e { 7 | /** 8 | * \brief performs in-place canonical composition. 9 | * 10 | * This will return the iterator in the end position after the 11 | * composition. 12 | * 13 | * \tparam StorageType the storage type where to apply it. 14 | * 15 | * Must support codepoint_begin, codepont_cbegin, codepoint_end, 16 | * codepoint_cend, as well as the member types iterator and 17 | * const_iterator. It is also a requirement that you should be able 18 | * to write to it as you read it, which means that this must only be 19 | * used in utf32 iterators, otherwise the output may race ahead of 20 | * the input. 21 | * 22 | * \param data the object where the canonical composition will be 23 | * performed. 24 | * 25 | * \param count return pointer for how many compositions were performed 26 | */ 27 | template 28 | typename StorageType::iterator 29 | inline canonical_composition(StorageType& data, int* count) { 30 | typename StorageType::iterator oi(data.codepoint_begin()); 31 | typename StorageType::iterator in = oi; 32 | typename StorageType::iterator end(data.codepoint_end()); 33 | 34 | int a, b, c; 35 | while (in != end) { 36 | // 37 | // grab the codepoint in the current input iterator 38 | // 39 | a = *in; 40 | if ((in + 1) == end) { 41 | // 42 | // If this is the last codepoint, it can't be composed, so we 43 | // just push it to the output as-is. 44 | // 45 | *(oi++) = a; 46 | in++; 47 | } else { 48 | // 49 | // look ahead for the next codepoint 50 | // 51 | b = *(in + 1); 52 | if (u5e::props::canonical_composition_mapping::resolve(a, b, &c)) { 53 | // 54 | // If this is a composition, we set it as the current input 55 | // iterator after advancing, because it may still be 56 | // composed more. 57 | // 58 | *(++in) = c; 59 | *count = *count + 1; 60 | } else { 61 | // 62 | // If there is no composition, we set it in the output iterator 63 | // 64 | *(oi++) = a; 65 | // 66 | // And finally advance the input iterator. 67 | // 68 | in++; 69 | } 70 | } 71 | } 72 | 73 | return oi; 74 | }; 75 | } 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/canonical_decomposition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_CANONICAL_DECOMPOSITION 2 | #define INCLUDED_U5E_CANONICAL_DECOMPOSITION 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace u5e { 14 | 15 | /** 16 | * \brief Perform codepoint-by-codepoint canonical decomposition 17 | * 18 | * This is one step of the normalization process, you probably want 19 | * to use that instead. 20 | * 21 | * This is meant to be used as an operation for u5e::filter. 22 | * 23 | * 24 | * \tparam OutputStringType the output string type to be used. 25 | * Because this reads data from the database, the returned data is 26 | * utf32ne, so you need an OutputStringType that is compatible with 27 | * that. 28 | * 29 | */ 30 | template 31 | inline int canonical_decomposition(const codepoint input, 32 | OutputStringType& output) { 33 | return codepoint_decomposition 34 | ( input, 35 | output, 36 | props::canonical_decomposition_mapping::resolve ); 37 | } 38 | 39 | } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/codepoint.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef INCLUDED_U5E_CODEPOINT_HPP 3 | #define INCLUDED_U5E_CODEPOINT_HPP 4 | 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \brief Native representation of a codepoint 10 | * 11 | * Explicity class in order to hijack overloads, such that we only 12 | * build codepoints out of known encodings and we only write to 13 | * encodings out of known codepoints. 14 | */ 15 | class codepoint { 16 | public: 17 | /** 18 | * A codepoint has an integer value type. 19 | */ 20 | codepoint_traits::int_type value; 21 | 22 | /** 23 | * Default constructor, starts as NULL. 24 | */ 25 | constexpr codepoint() : value(0) { }; 26 | 27 | /** 28 | * Implicit constructor from an integer value. 29 | */ 30 | constexpr codepoint(int32_t v) : value(v) { }; 31 | 32 | /** 33 | * Copy constructor. 34 | */ 35 | constexpr codepoint(const codepoint& x) = default; 36 | 37 | /** 38 | * Assignment operator from another codepoint. 39 | */ 40 | constexpr codepoint& operator=(const codepoint& x) = default; 41 | 42 | /** 43 | * Assignment operator from an int. 44 | */ 45 | constexpr codepoint& operator=(int c) { value = c; return *this; }; 46 | 47 | /** 48 | * Override int operator to return the codepoint value. 49 | */ 50 | constexpr operator int() const { return value; }; 51 | }; 52 | 53 | /** 54 | * Compare two codepoints by comparing their values. 55 | */ 56 | constexpr bool operator==(const codepoint& a, const codepoint& b) { return a.value == b.value; }; 57 | 58 | //@{ 59 | /** 60 | * Compare an int to a codepoint by comparing the codepoint's value 61 | * with the integer. 62 | */ 63 | constexpr bool operator==(const codepoint_traits::int_type a, const codepoint& b) { return a == b.value; }; 64 | constexpr bool operator==(const codepoint& a, const codepoint_traits::int_type b) { return a.value == b; }; 65 | //@} 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/codepoint_decomposition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_CODEPOINT_DECOMPOSITION 2 | #define INCLUDED_U5E_CODEPOINT_DECOMPOSITION 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace u5e { 13 | 14 | /** 15 | * \brief Perform codepoint by codepoint decomposition 16 | * 17 | * This is one step of the normalization process, you probably want 18 | * to use that instead. 19 | * 20 | * This implements only the logic of dealing with the resolved data, 21 | * the actual database resolution is a template parameter. 22 | * 23 | * This is meant to be used as an operation for u5e::filter. 24 | * 25 | * \tparam PropResolver the function that resolves the input 26 | * codepoint into a sequence of decomposed codepoints. 27 | * 28 | * \tparam OutputStringType the output string type to be used. 29 | * Because this reads data from the database, the returned data is 30 | * utf32ne, so you need an OutputStringType that is compatible with 31 | * that. 32 | * 33 | */ 34 | template 35 | inline int codepoint_decomposition 36 | (const codepoint input, 37 | OutputStringType& output, 38 | PropResolver& resolver) { 39 | int const * mapping = resolver(input); 40 | int const * begin; 41 | int const * end; 42 | int count = 0; 43 | if (mapping == NULL) { 44 | begin = &(input.value); 45 | end = begin; 46 | end++; 47 | count = 1; 48 | } else { 49 | begin = mapping; 50 | end = begin; 51 | while (*end != 0) { 52 | end++; 53 | count++; 54 | } 55 | } 56 | utf32ne_string_view from_database 57 | (std::experimental::basic_string_view(begin, count)); 58 | output.template append 59 | (from_database.codepoint_cbegin(), 60 | from_database.codepoint_cend()); 61 | return count; 62 | } 63 | 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/codepoint_traits.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_CODEPOINT_TRAITS 2 | #define INCLUDED_U5E_CODEPOINT_TRAITS 3 | 4 | #include 5 | 6 | namespace u5e { 7 | /** 8 | * \brief Type information for codepoint 9 | * 10 | * This class exists only to provide an interface similar to that of 11 | * the stream and string types. But it is not truly parameterizable, 12 | * since a codepoint always means the same thing. 13 | */ 14 | class codepoint_traits { 15 | public: 16 | //@{ 17 | /** 18 | * Basic meta-description of a codepoint 19 | */ 20 | typedef int32_t int_type; 21 | typedef uint32_t pos_type; 22 | typedef int32_t off_type; 23 | //@} 24 | }; 25 | } 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/compatibility_and_canonical_decomposition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_COMPATIBILITY_AND_CANONICAL_DECOMPOSITION 2 | #define INCLUDED_U5E_COMPATIBILITY_AND_CANONICAL_DECOMPOSITION 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace u5e { 14 | 15 | /** 16 | * \brief Perform compatibility and canonical decomposition 17 | * 18 | * This is one step of the normalization process, you probably want 19 | * to use that instead. 20 | * 21 | * This is meant to be used as an operation for u5e::filter. 22 | * 23 | * \tparam OutputStringType the output string type to be used. 24 | * Because this reads data from the database, the returned data is 25 | * utf32ne, so you need an OutputStringType that is compatible with 26 | * that. 27 | * 28 | */ 29 | template 30 | inline int compatibility_and_canonical_decomposition 31 | (const codepoint input, 32 | OutputStringType& output) { 33 | return codepoint_decomposition 34 | ( input, 35 | output, 36 | props::compatibility_and_canonical_decomposition_mapping::resolve ); 37 | } 38 | 39 | } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/encoding_assertion.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_ENCODING_ASSERTION 2 | #define INCLUDED_U5E_ENCODING_ASSERTION 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \brief Assert the encoding matches the native type 10 | * 11 | * Tests that the encoding can be used with the specific 12 | * native string type. 13 | */ 14 | template 15 | class encoding_assertion { 16 | iterator_assertion 17 | _assertion1; 18 | iterator_assertion 19 | _assertion2; 20 | iterator_assertion 21 | _assertion3; 22 | iterator_assertion 23 | _assertion4; 24 | }; 25 | } 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/filter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_FILTER 2 | #define INCLUDED_U5E_FILTER 3 | 4 | namespace u5e { 5 | 6 | /** 7 | * \brief Walks an input iterator through a filter 8 | * 9 | * This will go from the begin to the end of the input iterator and 10 | * will execute the filter function once for every input element. 11 | * 12 | * Unlike std::transform, the filter function does not return the 13 | * output element, but it receives the output object and will do 14 | * whatever makes sense with the output object. 15 | * 16 | * That means that the type of filter will define what type of 17 | * object can be used as output. The filter function itself will not 18 | * touch the output object, but simply forward it to the operator 19 | * function. 20 | * 21 | * The operator function returns an int that is meant to indicate 22 | * how much output was produced. The filter function will accumulate 23 | * those values and return the sum. 24 | * 25 | * The filter is not required to produce a constant number of 26 | * outputs for each input. The function can be produce many outputs 27 | * or even none at all during the processing of each element. 28 | * 29 | * The value type for input and output is not required to be the 30 | * same. The input type is resolved by the value_type member type of 31 | * the input iterator type. 32 | * 33 | * \tparam InputIteratorType the type of the input iterator 34 | * \tparam OutputType the type of the output iterator 35 | * \tparam Functor the callback function type called for each element 36 | * 37 | * \param input_from starting position for the input iterator 38 | * \param input_to end position for the input iterator 39 | * \param output output container sent to the operator function 40 | * \param operation function that takes the element, the output 41 | * container and returns the number of outputted elements 42 | */ 43 | template 45 | inline int 46 | filter(InputIteratorType input_from, InputIteratorType input_to, 47 | OutputType& output, Functor operation) { 48 | int counter = 0; 49 | while (input_from != input_to) { 50 | counter += operation(*input_from, output); 51 | input_from++; 52 | } 53 | return counter; 54 | } 55 | 56 | } 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/iterator_assertion.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_ITERATOR_ASSERTION 2 | #define INCLUDED_U5E_ITERATOR_ASSERTION 3 | 4 | #include 5 | 6 | namespace u5e { 7 | /** 8 | * \brief Asserts the iterator is consistently defined 9 | */ 10 | template 11 | class iterator_assertion { 12 | typedef typename std::iterator_traits::value_type VT; 13 | static_assert(sizeof(VT)==sizeof(T), 14 | "sizeof value_type incompatible with encoding"); 15 | static_assert(alignof(VT)==alignof(T), 16 | "alignof value_type incompatible with encoding"); 17 | static_assert(std::is_integral::value, 18 | "value_type is not an integral type"); 19 | }; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/normalization_form_c.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_NORMALIZATION_FORM_C 2 | #define INCLUDED_U5E_NORMALIZATION_FORM_C 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace u5e { 13 | /** 14 | * \brief u5e::filter algorithm for normalizing graphemes 15 | * 16 | * This will work by reading an input grapheme iterator and, 17 | * grapheme by grapheme normalize them in form C. 18 | * 19 | * This will use the unicode database to search for equivalent 20 | * codepoint sequences. 21 | */ 22 | template 24 | inline int normalization_form_c(basic_grapheme grapheme, 25 | OutputStorageType& output) { 26 | 27 | // first step is to decompose the grapheme 28 | utf32ne_string decomposed; 29 | int count = u5e::filter(grapheme.codepoint_begin(), 30 | grapheme.codepoint_end(), 31 | decomposed, 32 | canonical_decomposition); 33 | 34 | // then sort based on canonical combining class 35 | std::sort(decomposed.codepoint_begin(), decomposed.codepoint_end(), 36 | canonical_combining_order); 37 | 38 | // finally recompose. we will do that in-place on the decomposed 39 | // string, since we never have to look back. 40 | int compositions = 0; 41 | utf32ne_string::iterator oi_begin(decomposed.codepoint_begin()); 42 | utf32ne_string::iterator oi 43 | (u5e::canonical_composition(decomposed,&compositions)); 44 | 45 | // finally append the output 46 | output.template append_from_utf32ne 47 | (oi_begin, oi); 48 | 49 | // we re-use the counter from the decomposition filter and 50 | // subtract how many pair were composed into a single codepoint. 51 | return count - compositions; 52 | } 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/normalization_form_d.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_NORMALIZATION_FORM_D 2 | #define INCLUDED_U5E_NORMALIZATION_FORM_D 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace u5e { 12 | /** 13 | * \brief u5e::filter algorithm for normalizing graphemes 14 | * 15 | * This will work by reading an input grapheme iterator and, 16 | * grapheme by grapheme normalize them in form D. 17 | * 18 | * This will use the unicode database to search for equivalent 19 | * codepoint sequences. 20 | */ 21 | template 23 | inline int normalization_form_d(basic_grapheme grapheme, 24 | OutputStorageType& output) { 25 | 26 | // first step is to decompose the grapheme 27 | utf32ne_string decomposed; 28 | int count = u5e::filter(grapheme.codepoint_begin(), 29 | grapheme.codepoint_end(), 30 | decomposed, 31 | canonical_decomposition); 32 | 33 | // then sort based on canonical combining class 34 | std::sort(decomposed.codepoint_begin(), decomposed.codepoint_end(), 35 | canonical_combining_order); 36 | 37 | // finally append the output 38 | output.template append_from_utf32ne 39 | (decomposed.codepoint_begin(), 40 | decomposed.codepoint_end()); 41 | 42 | // we re-use the counter from the decomposition filter. 43 | return count; 44 | } 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/normalization_form_kc.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_NORMALIZATION_FORM_KC 2 | #define INCLUDED_U5E_NORMALIZATION_FORM_KC 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace u5e { 13 | /** 14 | * \brief u5e::filter algorithm for normalizing graphemes 15 | * 16 | * This will work by reading an input grapheme iterator and, 17 | * grapheme by grapheme normalize them in form KC. 18 | * 19 | * This will use the unicode database to search for equivalent 20 | * codepoint sequences. 21 | */ 22 | template 24 | inline int normalization_form_kc(basic_grapheme grapheme, 25 | OutputStorageType& output) { 26 | 27 | // first step is to decompose the grapheme 28 | utf32ne_string decomposed; 29 | int count = u5e::filter 30 | (grapheme.codepoint_begin(), 31 | grapheme.codepoint_end(), 32 | decomposed, 33 | compatibility_and_canonical_decomposition); 34 | 35 | // then sort based on canonical combining class 36 | std::sort(decomposed.codepoint_begin(), decomposed.codepoint_end(), 37 | canonical_combining_order); 38 | 39 | // finally recompose. we will do that in-place on the decomposed 40 | // string, since we never have to look back. 41 | int compositions = 0; 42 | utf32ne_string::iterator oi_begin(decomposed.codepoint_begin()); 43 | utf32ne_string::iterator oi 44 | (u5e::canonical_composition(decomposed,&compositions)); 45 | 46 | // finally append the output 47 | output.template append_from_utf32ne 48 | (oi_begin, oi); 49 | 50 | // we re-use the counter from the decomposition filter and 51 | // subtract how many pair were composed into a single codepoint. 52 | return count - compositions; 53 | } 54 | } 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/normalization_form_kd.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_NORMALIZATION_FORM_KD 2 | #define INCLUDED_U5E_NORMALIZATION_FORM_KD 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace u5e { 12 | /** 13 | * \brief u5e::filter algorithm for normalizing graphemes 14 | * 15 | * This will work by reading an input grapheme iterator and, 16 | * grapheme by grapheme normalize them in form KD. 17 | * 18 | * This will use the unicode database to search for equivalent 19 | * codepoint sequences. 20 | */ 21 | template 23 | inline int normalization_form_kd(basic_grapheme grapheme, 24 | OutputStorageType& output) { 25 | 26 | // first step is to decompose the grapheme 27 | utf32ne_string decomposed; 28 | int count = u5e::filter 29 | (grapheme.codepoint_begin(), 30 | grapheme.codepoint_end(), 31 | decomposed, 32 | compatibility_and_canonical_decomposition); 33 | 34 | // then sort based on canonical combining class 35 | std::sort(decomposed.codepoint_begin(), decomposed.codepoint_end(), 36 | canonical_combining_order); 37 | 38 | // finally append the output 39 | output.template append_from_utf32ne 40 | (decomposed.codepoint_begin(), 41 | decomposed.codepoint_end()); 42 | 43 | // we re-use the counter from the decomposition filter. 44 | return count; 45 | } 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/props/canonical_combining_class.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_PROPS_CANONICAL_COMBINING_CLASS 2 | #define INCLUDED_U5E_PROPS_CANONICAL_COMBINING_CLASS 3 | 4 | namespace u5e { 5 | /** 6 | * \brief codepoint property handling 7 | */ 8 | namespace props { 9 | /** 10 | * \brief Canonical_Combining_Class attribute 11 | */ 12 | class canonical_combining_class { 13 | public: 14 | /** 15 | * Return the Canonical_Combining_class for this codepoint 16 | */ 17 | static int resolve(int input); 18 | }; 19 | } 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/props/canonical_composition_mapping.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_PROPS_CANONICAL_COMPOSITION_MAPPING 2 | #define INCLUDED_U5E_PROPS_CANONICAL_COMPOSITION_MAPPING 3 | 4 | namespace u5e { 5 | /** 6 | * \brief codepoint property handling 7 | */ 8 | namespace props { 9 | /** 10 | * \brief Derived property for canonical composition 11 | * 12 | * This has the fully resolved canonical composition for 13 | * characters, including the composition exclusions specified in 14 | * the standard. 15 | */ 16 | class canonical_composition_mapping { 17 | public: 18 | /** 19 | * Given a pair of input codepoints a and b returns whether or 20 | * not that pair has a canonical composition. The composed 21 | * codepoint is returned via the r_composed pointer if that is 22 | * the case. 23 | * 24 | * \param a the first codepoint in the decomposed pair 25 | * 26 | * \param b the second codepoint in the decomposed pair 27 | * 28 | * \param r_composed the pointer where the composed codepoint 29 | * will be set if the return is true. 30 | */ 31 | static bool resolve(int a, int b, int* r_composed); 32 | }; 33 | } 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/props/canonical_decomposition_mapping.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_PROPS_CANONICAL_DECOMPOSITION_MAPPING 2 | #define INCLUDED_U5E_PROPS_CANONICAL_DECOMPOSITION_MAPPING 3 | 4 | namespace u5e { 5 | /** 6 | * \brief codepoint property handling 7 | */ 8 | namespace props { 9 | /** 10 | * \brief Subset of Decomposition_Mapping attribute 11 | * 12 | * This recursively resolves the canonical decomposition mapping. 13 | * The returned data is fully canonically decomposed. 14 | */ 15 | class canonical_decomposition_mapping { 16 | public: 17 | /** 18 | * Perform the decomposition. Returns NULL if the character has 19 | * no decomposition. 20 | * 21 | * The returned int array will be zero terminated. 22 | */ 23 | static int const * const resolve(int input); 24 | }; 25 | } 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/props/compatibility_and_canonical_decomposition_mapping.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_PROPS_COMPATIBILITY_AND_CANONICAL_DECOMPOSITION_MAPPING 2 | #define INCLUDED_U5E_PROPS_COMPATIBILITY_AND_CANONICAL_DECOMPOSITION_MAPPING 3 | 4 | namespace u5e { 5 | /** 6 | * \brief codepoint property handling 7 | */ 8 | namespace props { 9 | /** 10 | * \brief Subset of Decomposition_Mapping attribute 11 | * 12 | * This recursively resolves the canonical decomposition mapping. 13 | * The returned data is fully compat and canonically decomposed. 14 | */ 15 | class compatibility_and_canonical_decomposition_mapping { 16 | public: 17 | /** 18 | * Perform the decomposition. Returns NULL if the character has 19 | * no decomposition. 20 | * 21 | * The returned int array will be zero terminated. 22 | */ 23 | static int const * const resolve(int input); 24 | }; 25 | } 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/props/grapheme_cluster_break.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_PROPS_GRAPHEME_CLUSTER_BREAK 2 | #define INCLUDED_U5E_PROPS_GRAPHEME_CLUSTER_BREAK 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \brief codepoint property handling 10 | */ 11 | namespace props { 12 | /** 13 | * \brief Grapheme Cluster Break property for a codepoint 14 | */ 15 | class grapheme_cluster_break { 16 | public: 17 | /** 18 | * Possible values for the property as specified by the standard 19 | */ 20 | enum prop_value_type { 21 | OTHER, 22 | PREPEND, 23 | CR, 24 | LF, 25 | CONTROL, 26 | EXTEND, 27 | REGIONAL_INDICATOR, 28 | SPACINGMARK, 29 | L, 30 | V, 31 | T, 32 | LV, 33 | LVT, 34 | E_BASE, 35 | E_MODIFIER, 36 | ZWJ, 37 | GLUE_AFTER_ZWJ, 38 | E_BASE_GAZ, 39 | }; 40 | 41 | /** 42 | * Return the value of the property for the given codepoint by 43 | * looking at the database. 44 | */ 45 | static prop_value_type resolve(codepoint c); 46 | }; 47 | }; 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE 2 | #define INCLUDED_U5E_UTF32NE 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace u5e { 9 | /** 10 | * \brief Architecture-specific type to interface UTF32BE or UTF32LE 11 | * 12 | * utf32ne is not an encoding. It is a type that should be used to 13 | * interface with either UTF32BE or with UTF32LE depending on what 14 | * the native endianess is. 15 | * 16 | * Because utf32 with the native endianess can be used natively, 17 | * there's no special logic and everything is delegated to the 18 | * native types. 19 | */ 20 | class utf32ne { 21 | public: 22 | //@{ 23 | /** 24 | * Delegate to the underlying iterator 25 | */ 26 | template 27 | using iterator = typename NativeString::iterator; 28 | 29 | template 30 | using const_iterator = typename NativeString::const_iterator; 31 | 32 | template 33 | static typename NativeString::const_iterator 34 | native_const_iterator(typename NativeString::const_iterator it) { 35 | return it; 36 | } 37 | 38 | template 39 | static void append_from_utf32ne 40 | (InputNativeIterator first, InputNativeIterator last, 41 | OutputNativeString& output) { 42 | output.append(first, last); 43 | } 44 | 45 | //@} 46 | }; 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne_string.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE_STRING 2 | #define INCLUDED_U5E_UTF32NE_STRING 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace u5e { 9 | /** 10 | * \class u5e::utf32ne_string 11 | * \brief Typedef: basic_encodedstring of utf32ne and std::basic_string 12 | * 13 | * Although this is a typedef, it shows up in doxygen as a class for 14 | * better discoverability. 15 | * 16 | * \typedef utf32ne_string 17 | * \brief A basic_encodedstring of utf32ne and std::basic_string 18 | */ 19 | typedef basic_encodedstring> 21 | utf32ne_string; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne_string_grapheme.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE_STRING_GRAPHEME 2 | #define INCLUDED_U5E_UTF32NE_STRING_GRAPHEME 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf32ne_string_grapheme 10 | * \brief Typedef: basic_grapheme of utf32ne_string 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf32ne_string_grapheme 16 | * \brief A basic_grapheme of utf32ne_string 17 | */ 18 | typedef basic_grapheme utf32ne_string_grapheme; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne_string_grapheme_iterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE_STRING_GRAPHEME_ITERATOR 2 | #define INCLUDED_U5E_UTF32NE_STRING_GRAPHEME_ITERATOR 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf32ne_string_grapheme_iterator 10 | * \brief Typedef: basic_grapheme_iterator of utf32ne_string 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf32ne_string_grapheme_iterator 16 | * \brief A basic_grapheme_iterator of utf32ne_string 17 | */ 18 | typedef basic_grapheme_iterator 19 | utf32ne_string_grapheme_iterator; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne_string_view.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE_STRING_VIEW 2 | #define INCLUDED_U5E_UTF32NE_STRING_VIEW 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace u5e { 9 | /** 10 | * \class u5e::utf32ne_string_view 11 | * \brief Typedef: basic_encodedstring of utf32ne and basic_string_view 12 | * 13 | * Although this is a typedef, it shows up in doxygen as a class for 14 | * better discoverability. 15 | * 16 | * \typedef u5e::utf32ne_string_view 17 | * \brief A basic_encodedstring of utf32ne and basic_string_view 18 | */ 19 | typedef basic_encodedstring> 21 | utf32ne_string_view; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne_string_view_grapheme.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE_STRING_VIEW_GRAPHEME 2 | #define INCLUDED_U5E_UTF32NE_STRING_VIEW_GRAPHEME 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf32ne_string_view_grapheme 10 | * \brief Typedef: basic_grapheme of utf32ne_string_view 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf32ne_string_view_grapheme 16 | * \brief A basic_grapheme of utf32ne_string_view 17 | */ 18 | typedef basic_grapheme utf32ne_string_view_grapheme; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf32ne_string_view_grapheme_iterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF32NE_STRING_VIEW_GRAPHEME_ITERATOR 2 | #define INCLUDED_U5E_UTF32NE_STRING_VIEW_GRAPHEME_ITERATOR 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf32ne_string_view_grapheme_iterator 10 | * \brief Typedef: basic_grapheme_iterator of utf32ne_string_view 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf32ne_string_view_grapheme_iterator 16 | * \brief A basic_grapheme_iterator of utf32ne_string_view 17 | */ 18 | typedef basic_grapheme_iterator 19 | utf32ne_string_view_grapheme_iterator; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8 2 | #define INCLUDED_U5E_UTF8 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace u5e { 13 | /** 14 | * \brief Encoding type for UTF8 text. 15 | * Unlike UTF16 and UTF32, UTF8 is endian independent. 16 | */ 17 | class utf8 { 18 | public: 19 | 20 | /** 21 | * Delegated to utf8_iterator of the native type. 22 | * \tparam NativeString the native string type with utf8 data 23 | */ 24 | template 25 | using iterator = 26 | utf8_iterator; 27 | 28 | /** 29 | * Delegated to utf8_const_iterator of the native type 30 | * \tparam NativeString the native string type with utf8 data 31 | */ 32 | template 33 | using const_iterator = 34 | utf8_const_iterator; 35 | 36 | /** 37 | * Get access to the native const_iterator with the native data. 38 | */ 39 | template 40 | static typename NativeString::const_iterator 41 | native_const_iterator 42 | (utf8_const_iterator it) { 43 | it.rewind_to_start_of_codepoint(*(it.raw_iterator_)); 44 | return it.raw_iterator_; 45 | } 46 | 47 | template 48 | static void append_from_utf32ne 49 | (InputNativeIterator first, InputNativeIterator last, 50 | OutputNativeString& output) { 51 | while (first != last) { 52 | codepoint c = *first; 53 | char buf[6] = {}; // utf8 codepoint is never bigger than 6 chars 54 | utf8_iterator o_begin(buf); 55 | utf8_iterator o_i = o_begin; 56 | *o_i = c; 57 | ++o_i; 58 | output.append(o_begin.raw_iterator_, o_i.raw_iterator_); 59 | ++first; 60 | } 61 | } 62 | 63 | }; 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_bounds.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_BOUNDS 2 | #define INCLUDED_U5E_UTF8_BOUNDS 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace u5e { 9 | /** 10 | * \brief Check and enforce bounds of utf8 text 11 | * 12 | * This will only look at the last 6 octets of the text and will 13 | * only look at the first octet. It will not guarantee that the 14 | * entire text is valid. The intent of this class is to provide a 15 | * cheap safety check to make sure you will not have any under or 16 | * overflow when processing this text. 17 | * 18 | * \tparam NativeIterator The native type to be iterated over. 19 | */ 20 | template 21 | class utf8_bounds { 22 | public: 23 | /** 24 | * The NativeIterator must match the attributes of char 25 | */ 26 | iterator_assertion _assertions; 27 | 28 | /** 29 | * Check the bounds of the utf8 text, returns true if the text has 30 | * correct bounds. 31 | */ 32 | static bool check(NativeIterator begin, NativeIterator end) { 33 | if (utf8_util::is_codepoint_continuation(*begin)) { 34 | return false; 35 | } else { 36 | int max_walkback = 6; // mathematically, it's impossible for 37 | // something more than 6 elements away 38 | // from the end to generate a overflow. 39 | int walkback = 0; 40 | while (walkback < max_walkback && end != begin) { 41 | char octet = *end; 42 | if (utf8_util::is_codepoint_start(octet)) { 43 | if (utf8_util::codepoint_size(octet) > walkback) { 44 | return false; 45 | } 46 | } 47 | --end; walkback++; 48 | } 49 | return true; 50 | } 51 | } 52 | 53 | /** 54 | * Enforce the bounds of the utf8 text, replace any bad character 55 | * in the bounds by '?. Returns false if any substitution was made. 56 | */ 57 | static bool enforce(NativeIterator begin, NativeIterator end) { 58 | bool ret = true; 59 | while (utf8_util::is_codepoint_continuation(*begin)) { 60 | *begin = '?'; 61 | ++begin; 62 | ret = false; 63 | } 64 | int max_walkback = 6; // mathematically, it's impossible for 65 | // something more than 6 elements away 66 | // from the end to generate a overflow. 67 | int walkback = 0; 68 | while (walkback < max_walkback && end != begin) { 69 | char octet = *end; 70 | if (utf8_util::is_codepoint_start(octet)) { 71 | if (utf8_util::codepoint_size(octet) > walkback) { 72 | *end = '?'; 73 | ret = false; 74 | } 75 | } 76 | --end; walkback++; 77 | } 78 | return ret; 79 | } 80 | }; 81 | } 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_iterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_ITERATOR 2 | #define INCLUDED_U5E_UTF8_ITERATOR 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace u5e { 11 | /** 12 | * \brief Defines the basic inner workings of utf8 iterator 13 | * 14 | * \tparam NativeIterator The underlying type to be iterated over. 15 | */ 16 | template 17 | class utf8_iterator_base { 18 | public: 19 | /** 20 | * The NativeIterator must match the attributes of char 21 | */ 22 | iterator_assertion _assertions; 23 | /** 24 | * This class composes over the NativeIterator 25 | */ 26 | NativeIterator raw_iterator_; 27 | 28 | //@{ 29 | /** 30 | * Basic iterator typedefs 31 | */ 32 | typedef codepoint value_type; 33 | typedef const codepoint& reference; 34 | typedef int difference_type; 35 | typedef std::bidirectional_iterator_tag iterator_category; 36 | //@} 37 | 38 | /** 39 | * Create a iterator from the underlying iterator 40 | */ 41 | inline utf8_iterator_base(const NativeIterator raw_iterator) 42 | : raw_iterator_(raw_iterator) { 43 | }; 44 | 45 | /** 46 | * When doing a reverse itetor, you need to be able to find 47 | * where the current codepoint started. 48 | */ 49 | inline bool rewind_to_start_of_codepoint(const char current_octet) { 50 | // when we do '*it = codepoint', we will leave the iterator 51 | // halfway into the next character 52 | bool ret = false; 53 | while (utf8_util::is_codepoint_continuation(*raw_iterator_)) { 54 | raw_iterator_--; 55 | ret = true; 56 | } 57 | return ret; 58 | } 59 | 60 | /** 61 | * Advance the iterator to the next codepoint 62 | */ 63 | inline void forward_one_codepoint() { 64 | rewind_to_start_of_codepoint(*raw_iterator_); 65 | difference_type size = utf8_util::codepoint_size(*raw_iterator_); 66 | std::advance(raw_iterator_, size); 67 | } 68 | 69 | /** 70 | * Go to the previous codepoint. 71 | */ 72 | inline void rewind_one_codepoint() { 73 | rewind_to_start_of_codepoint(*raw_iterator_); 74 | raw_iterator_--; 75 | while (utf8_util::is_codepoint_continuation(*raw_iterator_)) { 76 | raw_iterator_--; 77 | } 78 | } 79 | 80 | /** 81 | * Return the codepoint that starts where we are now 82 | */ 83 | const codepoint current_codepoint() { 84 | char first_octet = *raw_iterator_; 85 | if (utf8_util::is_7bit_character(first_octet)) { 86 | return first_octet; 87 | } else { 88 | if (rewind_to_start_of_codepoint(first_octet)) { 89 | first_octet = *raw_iterator_; 90 | } 91 | NativeIterator copy_ = raw_iterator_; 92 | difference_type size = 93 | utf8_util::codepoint_size(first_octet); 94 | unsigned char mask_first_octet = ~(0xFF<<(7-size)); 95 | int value = (first_octet & mask_first_octet); 96 | while (--size) { 97 | value = value<<6 | (*(++copy_) & 0b00111111); 98 | } 99 | return value; 100 | } 101 | } 102 | 103 | }; 104 | 105 | /** 106 | * \brief const iterator for utf8 encoded strings. 107 | * \tparam NativeIterator The underlying type to be iterated over. 108 | */ 109 | template 110 | class utf8_const_iterator 111 | : public utf8_iterator_base { 112 | public: 113 | /** 114 | * Offers itself as the pointer type 115 | */ 116 | typedef utf8_const_iterator pointer; 117 | 118 | /** 119 | * Create from the underlying iterator type 120 | */ 121 | inline utf8_const_iterator(const NativeIterator raw_iterator) 122 | : utf8_iterator_base(raw_iterator) { }; 123 | 124 | /** 125 | * Copy constructor 126 | */ 127 | inline utf8_const_iterator(const utf8_const_iterator& tocopy) 128 | : utf8_iterator_base(tocopy.raw_iterator_) { }; 129 | 130 | //@{ 131 | /** 132 | * Advance the iterator 133 | */ 134 | inline utf8_const_iterator& operator++() { 135 | this->forward_one_codepoint(); 136 | return *this; 137 | } 138 | 139 | inline utf8_const_iterator operator++(int junk) { 140 | utf8_const_iterator copy(this->raw_iterator_); 141 | ++(*this); 142 | return copy; 143 | } 144 | //@} 145 | 146 | //@{ 147 | /** 148 | * Rewinds the iterator 149 | */ 150 | inline utf8_const_iterator& operator--() { 151 | this->rewind_one_codepoint(); 152 | return *this; 153 | } 154 | 155 | inline utf8_const_iterator operator--(int junk) { 156 | utf8_const_iterator copy(this->raw_iterator_); 157 | --(*this); 158 | return copy; 159 | } 160 | //@} 161 | 162 | //@{ 163 | /** 164 | * Compare with another iterator 165 | */ 166 | inline bool operator==(const utf8_const_iterator& rhs) const { 167 | char c; 168 | utf8_const_iterator copy(*this); 169 | c = *(copy.raw_iterator_); 170 | copy.rewind_to_start_of_codepoint(c); 171 | c = *(copy.raw_iterator_); 172 | int size = utf8_util::codepoint_size(c); 173 | while (size) { 174 | if (copy.raw_iterator_ == rhs.raw_iterator_) { 175 | return true; 176 | } 177 | ++(copy.raw_iterator_); 178 | --size; 179 | } 180 | return false; 181 | } 182 | 183 | inline bool operator!=(const utf8_const_iterator& rhs) const { 184 | return !(*this == rhs); 185 | } 186 | //@} 187 | 188 | /** 189 | * Dereference the current codepoint out of the iterator 190 | */ 191 | inline const codepoint operator*() { 192 | return this->current_codepoint(); 193 | } 194 | 195 | }; 196 | 197 | /** 198 | * \brief mutable utf8 iterator 199 | * 200 | * Note that if you set a value in the middle of a text, you will 201 | * likely make the string invalid. Most of the time you should only 202 | * consider appending to an iterator, never writing in the middle of 203 | * the text. 204 | * \tparam NativeIterator The underlying type to be iterated over. 205 | */ 206 | template 207 | class utf8_iterator 208 | : public utf8_iterator_base { 209 | public: 210 | /** 211 | * Offer itself as the pointer type 212 | */ 213 | typedef utf8_iterator pointer; 214 | 215 | /** 216 | * Construct fro the underlying iterator 217 | */ 218 | inline utf8_iterator(const NativeIterator raw_iterator) 219 | : utf8_iterator_base(raw_iterator) {}; 220 | 221 | /** 222 | * Copy constructor 223 | */ 224 | inline utf8_iterator(const utf8_iterator& tocopy) 225 | : utf8_iterator_base(tocopy.raw_iterator_) {}; 226 | 227 | //@{ 228 | /** 229 | * Advance the iterator 230 | */ 231 | inline utf8_iterator& operator++() { 232 | this->forward_one_codepoint(); 233 | return *this; 234 | } 235 | 236 | inline utf8_iterator operator++(int junk) { 237 | utf8_iterator copy(this->raw_iterator_); 238 | ++(*this); 239 | return copy; 240 | } 241 | //@} 242 | 243 | //@{ 244 | /** 245 | * Rewind the iterator 246 | */ 247 | inline utf8_iterator& operator--() { 248 | this->rewind_one_codepoint(); 249 | return *this; 250 | } 251 | 252 | inline utf8_iterator operator--(int junk) { 253 | utf8_iterator copy(this->raw_iterator_); 254 | --(*this); 255 | return copy; 256 | } 257 | //@} 258 | 259 | //@{ 260 | /** 261 | * Compare the iterator with another iterator 262 | */ 263 | inline bool operator==(const utf8_iterator& rhs) const { 264 | char c; 265 | utf8_iterator copy(*this); 266 | c = *(copy.raw_iterator_); 267 | copy.rewind_to_start_of_codepoint(c); 268 | c = *(copy.raw_iterator_); 269 | int size = utf8_util::codepoint_size(c); 270 | while (size) { 271 | if (copy.raw_iterator_ == rhs.raw_iterator_) { 272 | return true; 273 | } 274 | ++(copy.raw_iterator_); 275 | --size; 276 | } 277 | return false; 278 | } 279 | 280 | inline bool operator!=(const utf8_iterator& rhs) const { 281 | return !(*this == rhs); 282 | } 283 | //@} 284 | 285 | /** 286 | * \brief offers write access to the iterator at a given position 287 | * 288 | * This is necessary because operator= can only be done after 289 | * operator* is executed, this wouldn't be necessary if there was 290 | * a dedicated operator for 'assign to the dereference'. 291 | */ 292 | class proxyobject : public codepoint { 293 | private: 294 | /** 295 | * A proxy object refers to an iterator state 296 | */ 297 | utf8_iterator& ref; 298 | public: 299 | 300 | /** 301 | * Create from the iterator 302 | */ 303 | proxyobject(utf8_iterator& refin) 304 | :ref(refin) { 305 | utf8_iterator copy = refin; 306 | value = copy.current_codepoint().value; 307 | }; 308 | 309 | /** 310 | * Assign a codepoint to this position, writing as many octets 311 | * as necessary. Note that if you do this in the middle of a 312 | * string, there is a likely chance that you will render the 313 | * remainder of the string invalid. So it's really only a good 314 | * idea to do this as an "append" operation. 315 | */ 316 | proxyobject& operator=(const codepoint c) { 317 | int value = c.value; // operate on codepoint as integer 318 | int size = utf8_util::encoded_size(value); 319 | if (size <= 1) { 320 | *(ref.raw_iterator_) = (value & 0xFF); 321 | } else { 322 | unsigned char first_octet = (0xFF<<(8-size)); 323 | first_octet |= ((value>>((size-1)*6)) & 0xFF); 324 | *(ref.raw_iterator_) = first_octet; 325 | while (--size) { 326 | unsigned char octet = 0b10000000; 327 | octet |= ((value>>((size-1)*6)) & 0b00111111); 328 | ref.raw_iterator_++; 329 | *(ref.raw_iterator_) = octet; 330 | } 331 | } 332 | return *this; 333 | } 334 | }; 335 | 336 | /** 337 | * mutable utf8 iterator returns a proxy object in order to allow 338 | * assignment to happen. 339 | */ 340 | inline proxyobject operator*() { 341 | return proxyobject(*this); 342 | } 343 | 344 | }; 345 | }; 346 | 347 | #endif 348 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_string.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_STRING 2 | #define INCLUDED_U5E_UTF8_STRING 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace u5e { 9 | /** 10 | * \class u5e::utf8_string 11 | * \brief Typedef: basic_encodedstring of utf8 and std::string 12 | * 13 | * Although this is a typedef, it shows up in doxygen as a class for 14 | * better discoverability. 15 | * 16 | * \typedef u5e::utf8_string 17 | * \brief A basic_encodedstring of utf8 and std::string 18 | */ 19 | typedef basic_encodedstring utf8_string; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_string_grapheme.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_STRING_GRAPHEME 2 | #define INCLUDED_U5E_UTF8_STRING_GRAPHEME 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf8_string_grapheme 10 | * \brief Typedef: basic_grapheme of utf8_string 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf8_string_grapheme 16 | * \brief A basic_grapheme of utf8_string 17 | */ 18 | typedef basic_grapheme utf8_string_grapheme; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_string_grapheme_iterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_STRING_GRAPHEME_ITERATOR 2 | #define INCLUDED_U5E_UTF8_STRING_GRAPHEME_ITERATOR 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf8_string_grapheme_iterator 10 | * \brief Typedef: basic_grapheme_iterator of utf8_string. 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf8_string_grapheme_iterator 16 | * \brief A basic_grapheme_iterator of utf8_string. 17 | */ 18 | typedef basic_grapheme_iterator 19 | utf8_string_grapheme_iterator; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_string_view.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_STRING_VIEW 2 | #define INCLUDED_U5E_UTF8_STRING_VIEW 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace u5e { 10 | /** 11 | * \class u5e::utf8_string_view 12 | * \brief Typedef: basic_encodedstring of utf8 and string_view. 13 | * 14 | * Although this is a typedef, it shows up in doxygen as a class for 15 | * better discoverability. 16 | * 17 | * \typedef u5e::utf8_string_view 18 | * \brief A basic_encodedstring of utf8 and string_view. 19 | */ 20 | typedef basic_encodedstring 21 | utf8_string_view; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_string_view_grapheme.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_STRING_VIEW_GRAPHEME 2 | #define INCLUDED_U5E_UTF8_STRING_VIEW_GRAPHEME 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf8_string_view_grapheme 10 | * \brief Typedef: basic_grapheme of utf8_string_view 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf8_string_view_grapheme 16 | * \brief A basic_grapheme of utf8_string_view 17 | */ 18 | typedef basic_grapheme utf8_string_view_grapheme; 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_string_view_grapheme_iterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_STRING_VIEW_GRAPHEME_ITERATOR 2 | #define INCLUDED_U5E_UTF8_STRING_VIEW_GRAPHEME_ITERATOR 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \class u5e::utf8_string_view_grapheme_iterator 10 | * \brief Typedef: basic_grapheme_iterator of utf8_string_view. 11 | * 12 | * Although this is a typedef, it shows up in doxygen as a class for 13 | * better discoverability. 14 | * 15 | * \typedef u5e::utf8_string_view_grapheme_iterator 16 | * \brief A basic_grapheme_iterator of utf8_string_view. 17 | */ 18 | typedef basic_grapheme_iterator 19 | utf8_string_view_grapheme_iterator; 20 | }; 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/utf8_util.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_U5E_UTF8_UTIL 2 | #define INCLUDED_U5E_UTF8_UTIL 3 | 4 | #include 5 | #include 6 | 7 | namespace u5e { 8 | /** 9 | * \brief Basic operations necessary for implementing utf8 10 | */ 11 | class utf8_util { 12 | public: 13 | 14 | /** 15 | * Check whether or not this is a 7bit character 16 | */ 17 | inline static bool is_7bit_character(const char octet) { 18 | if (octet & 0b10000000) { 19 | return false; 20 | } else { 21 | return true; 22 | } 23 | } 24 | 25 | /** 26 | * Check whether or not this is octet is a codepoint continuation 27 | */ 28 | inline static bool is_codepoint_continuation(const char octet) { 29 | if ((octet & 0b11000000) == 0b10000000) { 30 | return true; 31 | } else { 32 | return false; 33 | } 34 | } 35 | 36 | /** 37 | * Check whether or not this is a first octet in a sequence 38 | */ 39 | inline static bool is_codepoint_start(const char octet) { 40 | if ((octet & 0b11000000) == 0b11000000) { 41 | return true; 42 | } else { 43 | return false; 44 | } 45 | } 46 | 47 | /** 48 | * Find the codepoint size given the first utf8 octet 49 | */ 50 | inline static int codepoint_size(const char first_octet) { 51 | // count leading zeros on bitwise negated first octet. for 52 | // single-octet codepoints, this would return 0, so we do 53 | // std::max for 1 for those cases. 54 | return std::max(__builtin_clz(~(first_octet << 24)),1); 55 | } 56 | 57 | /** 58 | * How many octets will this codepoint take 59 | */ 60 | inline static int encoded_size(int value) { 61 | return std::ceil((float)(32 - __builtin_clz(value) - 1) / (float)6); 62 | } 63 | 64 | }; 65 | } 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /extlib/u5e/include/u5e/version.hpp: -------------------------------------------------------------------------------- 1 | #ifndef U5E_VERSION 2 | #define U5E_VERSION 3 | 4 | /** 5 | * \brief main u5e namespace 6 | */ 7 | namespace u5e { 8 | /** 9 | * \brief introspection over the vesion of the library 10 | * both at compile time and at runtime. 11 | */ 12 | namespace version { 13 | /** 14 | * \brief namespace with compile-time-constant version declaration 15 | */ 16 | namespace compile_time { 17 | //@{ 18 | /** 19 | * Compile-time version definition 20 | */ 21 | constexpr int major = 0; 22 | constexpr int minor = 0; 23 | constexpr int patch = 0; 24 | //@} 25 | } 26 | /** 27 | * \brief introspection for run-time version declaration 28 | * To test which version of the library are you linking against 29 | */ 30 | class run_time { 31 | public: 32 | //@{ 33 | /** 34 | * \brief Run-time introspection for library version 35 | */ 36 | static const int major; 37 | static const int minor; 38 | static const int patch; 39 | //@} 40 | }; 41 | } 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/CompositionExclusions.txt: -------------------------------------------------------------------------------- 1 | # CompositionExclusions-9.0.0.txt 2 | # Date: 2016-01-21, 22:00:00 GMT [KW, LI] 3 | # © 2016 Unicode®, Inc. 4 | # For terms of use, see http://www.unicode.org/terms_of_use.html 5 | # 6 | # Unicode Character Database 7 | # For documentation, see http://www.unicode.org/reports/tr44/ 8 | # 9 | # This file lists the characters for the Composition Exclusion Table 10 | # defined in UAX #15, Unicode Normalization Forms. 11 | # 12 | # This file is a normative contributory data file in the 13 | # Unicode Character Database. 14 | # 15 | # For more information, see 16 | # http://www.unicode.org/unicode/reports/tr15/#Primary_Exclusion_List_Table 17 | # 18 | # For a full derivation of composition exclusions, see the derived property 19 | # Full_Composition_Exclusion in DerivedNormalizationProps.txt 20 | # 21 | 22 | # ================================================ 23 | # (1) Script Specifics 24 | # 25 | # This list of characters cannot be derived from the UnicodeData.txt file. 26 | # ================================================ 27 | 28 | 0958 # DEVANAGARI LETTER QA 29 | 0959 # DEVANAGARI LETTER KHHA 30 | 095A # DEVANAGARI LETTER GHHA 31 | 095B # DEVANAGARI LETTER ZA 32 | 095C # DEVANAGARI LETTER DDDHA 33 | 095D # DEVANAGARI LETTER RHA 34 | 095E # DEVANAGARI LETTER FA 35 | 095F # DEVANAGARI LETTER YYA 36 | 09DC # BENGALI LETTER RRA 37 | 09DD # BENGALI LETTER RHA 38 | 09DF # BENGALI LETTER YYA 39 | 0A33 # GURMUKHI LETTER LLA 40 | 0A36 # GURMUKHI LETTER SHA 41 | 0A59 # GURMUKHI LETTER KHHA 42 | 0A5A # GURMUKHI LETTER GHHA 43 | 0A5B # GURMUKHI LETTER ZA 44 | 0A5E # GURMUKHI LETTER FA 45 | 0B5C # ORIYA LETTER RRA 46 | 0B5D # ORIYA LETTER RHA 47 | 0F43 # TIBETAN LETTER GHA 48 | 0F4D # TIBETAN LETTER DDHA 49 | 0F52 # TIBETAN LETTER DHA 50 | 0F57 # TIBETAN LETTER BHA 51 | 0F5C # TIBETAN LETTER DZHA 52 | 0F69 # TIBETAN LETTER KSSA 53 | 0F76 # TIBETAN VOWEL SIGN VOCALIC R 54 | 0F78 # TIBETAN VOWEL SIGN VOCALIC L 55 | 0F93 # TIBETAN SUBJOINED LETTER GHA 56 | 0F9D # TIBETAN SUBJOINED LETTER DDHA 57 | 0FA2 # TIBETAN SUBJOINED LETTER DHA 58 | 0FA7 # TIBETAN SUBJOINED LETTER BHA 59 | 0FAC # TIBETAN SUBJOINED LETTER DZHA 60 | 0FB9 # TIBETAN SUBJOINED LETTER KSSA 61 | FB1D # HEBREW LETTER YOD WITH HIRIQ 62 | FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH 63 | FB2A # HEBREW LETTER SHIN WITH SHIN DOT 64 | FB2B # HEBREW LETTER SHIN WITH SIN DOT 65 | FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT 66 | FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT 67 | FB2E # HEBREW LETTER ALEF WITH PATAH 68 | FB2F # HEBREW LETTER ALEF WITH QAMATS 69 | FB30 # HEBREW LETTER ALEF WITH MAPIQ 70 | FB31 # HEBREW LETTER BET WITH DAGESH 71 | FB32 # HEBREW LETTER GIMEL WITH DAGESH 72 | FB33 # HEBREW LETTER DALET WITH DAGESH 73 | FB34 # HEBREW LETTER HE WITH MAPIQ 74 | FB35 # HEBREW LETTER VAV WITH DAGESH 75 | FB36 # HEBREW LETTER ZAYIN WITH DAGESH 76 | FB38 # HEBREW LETTER TET WITH DAGESH 77 | FB39 # HEBREW LETTER YOD WITH DAGESH 78 | FB3A # HEBREW LETTER FINAL KAF WITH DAGESH 79 | FB3B # HEBREW LETTER KAF WITH DAGESH 80 | FB3C # HEBREW LETTER LAMED WITH DAGESH 81 | FB3E # HEBREW LETTER MEM WITH DAGESH 82 | FB40 # HEBREW LETTER NUN WITH DAGESH 83 | FB41 # HEBREW LETTER SAMEKH WITH DAGESH 84 | FB43 # HEBREW LETTER FINAL PE WITH DAGESH 85 | FB44 # HEBREW LETTER PE WITH DAGESH 86 | FB46 # HEBREW LETTER TSADI WITH DAGESH 87 | FB47 # HEBREW LETTER QOF WITH DAGESH 88 | FB48 # HEBREW LETTER RESH WITH DAGESH 89 | FB49 # HEBREW LETTER SHIN WITH DAGESH 90 | FB4A # HEBREW LETTER TAV WITH DAGESH 91 | FB4B # HEBREW LETTER VAV WITH HOLAM 92 | FB4C # HEBREW LETTER BET WITH RAFE 93 | FB4D # HEBREW LETTER KAF WITH RAFE 94 | FB4E # HEBREW LETTER PE WITH RAFE 95 | 96 | # Total code points: 67 97 | 98 | # ================================================ 99 | # (2) Post Composition Version precomposed characters 100 | # 101 | # These characters cannot be derived solely from the UnicodeData.txt file 102 | # in this version of Unicode. 103 | # 104 | # Note that characters added to the standard after the 105 | # Composition Version and which have canonical decomposition mappings 106 | # are not automatically added to this list of Post Composition 107 | # Version precomposed characters. 108 | # ================================================ 109 | 110 | 2ADC # FORKING 111 | 1D15E # MUSICAL SYMBOL HALF NOTE 112 | 1D15F # MUSICAL SYMBOL QUARTER NOTE 113 | 1D160 # MUSICAL SYMBOL EIGHTH NOTE 114 | 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE 115 | 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE 116 | 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE 117 | 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 118 | 1D1BB # MUSICAL SYMBOL MINIMA 119 | 1D1BC # MUSICAL SYMBOL MINIMA BLACK 120 | 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE 121 | 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK 122 | 1D1BF # MUSICAL SYMBOL FUSA WHITE 123 | 1D1C0 # MUSICAL SYMBOL FUSA BLACK 124 | 125 | # Total code points: 14 126 | 127 | # ================================================ 128 | # (3) Singleton Decompositions 129 | # 130 | # These characters can be derived from the UnicodeData.txt file 131 | # by including all canonically decomposable characters whose 132 | # canonical decomposition consists of a single character. 133 | # 134 | # These characters are simply quoted here for reference. 135 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt 136 | # ================================================ 137 | 138 | # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK 139 | # 0343 COMBINING GREEK KORONIS 140 | # 0374 GREEK NUMERAL SIGN 141 | # 037E GREEK QUESTION MARK 142 | # 0387 GREEK ANO TELEIA 143 | # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA 144 | # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA 145 | # 1F75 GREEK SMALL LETTER ETA WITH OXIA 146 | # 1F77 GREEK SMALL LETTER IOTA WITH OXIA 147 | # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA 148 | # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA 149 | # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA 150 | # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA 151 | # 1FBE GREEK PROSGEGRAMMENI 152 | # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA 153 | # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA 154 | # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 155 | # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA 156 | # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 157 | # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA 158 | # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA 159 | # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA 160 | # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA 161 | # 1FFD GREEK OXIA 162 | # 2000..2001 [2] EN QUAD..EM QUAD 163 | # 2126 OHM SIGN 164 | # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN 165 | # 2329 LEFT-POINTING ANGLE BRACKET 166 | # 232A RIGHT-POINTING ANGLE BRACKET 167 | # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D 168 | # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10 169 | # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12 170 | # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E 171 | # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 172 | # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 173 | # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 174 | # FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D 175 | # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 176 | # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 177 | 178 | # Total code points: 1035 179 | 180 | # ================================================ 181 | # (4) Non-Starter Decompositions 182 | # 183 | # These characters can be derived from the UnicodeData.txt file 184 | # by including each expanding canonical decomposition 185 | # (i.e., those which canonically decompose to a sequence 186 | # of characters instead of a single character), such that: 187 | # 188 | # A. The character is not a Starter. 189 | # 190 | # OR (inclusive) 191 | # 192 | # B. The character's canonical decomposition begins 193 | # with a character that is not a Starter. 194 | # 195 | # Note that a "Starter" is any character with a zero combining class. 196 | # 197 | # These characters are simply quoted here for reference. 198 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt 199 | # ================================================ 200 | 201 | # 0344 COMBINING GREEK DIALYTIKA TONOS 202 | # 0F73 TIBETAN VOWEL SIGN II 203 | # 0F75 TIBETAN VOWEL SIGN UU 204 | # 0F81 TIBETAN VOWEL SIGN REVERSED II 205 | 206 | # Total code points: 4 207 | 208 | # EOF 209 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/canonical_combining_class.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | typedef const std::initializer_list> 8 | data_t; 9 | 10 | static data_t 11 | data = 12 | { 13 | #include "canonical_combining_class_data.hpp" 14 | }; 15 | 16 | static int compare_value_to_row(const void* pkey, 17 | const void* pelem) { 18 | int const * const key = (int const * const)pkey; 19 | int const * const * const ref = (int const * const * const)pelem; 20 | int const * const elem = *ref; 21 | if (key[0] < elem[0]) { 22 | return -1; 23 | } else if (key[0] > elem[0]) { 24 | return 1; 25 | } else { 26 | return 0; 27 | } 28 | }; 29 | 30 | namespace u5e { 31 | namespace props { 32 | int canonical_combining_class::resolve(int input) { 33 | int const * const * elem = 34 | (int const * const * const) 35 | bsearch((const void*)&input, (const void*)data.begin(), 36 | data.size(), sizeof(data_t::value_type), 37 | compare_value_to_row); 38 | if (elem == NULL) { 39 | return 0; 40 | } else { 41 | int const * it = *elem; 42 | return it[1]; // first element in array is codepoint itself 43 | } 44 | } 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/canonical_combining_class_data.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | 5 | # 6 | # extract from UnicodeData.txt the Canonical_Combining_Class attribute. 7 | # 8 | open my $ucd, '<', 'UnicodeData.txt' 9 | or die "Failed to open UnicodeData.txt: $!\n"; 10 | my %d; 11 | while (defined (my $line = <$ucd>)) { 12 | chomp $line; 13 | # we will not decode the hex data and store the codepoints as string. 14 | my ($codepoint, $class) = (split /;/, $line)[0,3]; 15 | # default is 0 16 | next unless $class; 17 | $d{$codepoint} = $class; 18 | } 19 | close $ucd; 20 | 21 | # 22 | # Output the data 23 | # 24 | my @sorted_keys = sort { hex($a) <=> hex($b) } keys %d; 25 | 26 | open my $o, '>', 'canonical_combining_class_data.hpp' 27 | or die $!; 28 | print $o 29 | join(",\n", 30 | map { 31 | "{ ".hex($_).", ".$d{$_}." }"; 32 | } @sorted_keys 33 | ); 34 | close $o; 35 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/canonical_composition_mapping.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef const std::initializer_list> 7 | data_t; 8 | 9 | static data_t 10 | data = 11 | { 12 | #include "canonical_composition_mapping_data.hpp" 13 | }; 14 | 15 | static int compare_value_to_row(const void* pkey, 16 | const void* pelem) { 17 | int const * const key = (int const * const)pkey; 18 | int const * const * const ref = (int const * const * const)pelem; 19 | int const * const elem = *ref; 20 | if (key[0] < elem[0]) { 21 | return -1; 22 | } else if (key[0] > elem[0]) { 23 | return 1; 24 | } else { 25 | if (key[1] < elem[1]) { 26 | return -1; 27 | } else if (key[1] > elem[1]) { 28 | return 1; 29 | } else { 30 | return 0; 31 | } 32 | } 33 | }; 34 | 35 | namespace u5e { 36 | namespace props { 37 | bool canonical_composition_mapping::resolve(int a, int b, int* r_composed) { 38 | int input[3] = { a, b, 0 }; 39 | int const * const * elem = 40 | (int const * const * const) 41 | bsearch((const void*)&input, (const void*)data.begin(), 42 | data.size(), sizeof(data_t::value_type), 43 | compare_value_to_row); 44 | if (elem == NULL) { 45 | return false; 46 | } else { 47 | int const * it = *elem; 48 | *r_composed = it[2]; 49 | return true; 50 | } 51 | } 52 | } 53 | } 54 | 55 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/canonical_composition_mapping_data.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | 5 | # 6 | # extract from UnicodeData.txt the Decomposition_Mapping attribute. 7 | # But extract it from the perspective of someone trying to compose a 8 | # character instead of decompose it. 9 | # 10 | open my $ucd, '<', 'UnicodeData.txt' 11 | or die "Failed to open UnicodeData.txt: $!\n"; 12 | my %d; 13 | my %ccc; 14 | while (defined (my $line = <$ucd>)) { 15 | chomp $line; 16 | # we will not decode the hex data and store the codepoints as string. 17 | my ($codepoint, $ccc, $decomposition) = (split /;/, $line)[0,3,5]; 18 | 19 | # collect the ccc for every character 20 | $ccc{$codepoint} = $ccc || 0; 21 | 22 | # only canonical composition 23 | next unless $decomposition; 24 | next if $decomposition =~ /^\)) { 43 | chomp $line; 44 | $line =~ s/#.*//; 45 | $line =~ s/\s.+//; 46 | next unless $line; 47 | $excl{$line} = 1; 48 | } 49 | close $excl; 50 | 51 | my @all_decompositions = keys %d; 52 | foreach my $decomposition (@all_decompositions) { 53 | my ($first) = split(/ /, $decomposition); 54 | if ($ccc{$first} != 0) { 55 | # ignore decompositions that start with a character that is not a 56 | # starter. 57 | delete $d{$decomposition}; 58 | } else { 59 | my $composition = $d{$decomposition}; 60 | if (exists $excl{$composition}) { 61 | # characters explicitly excluded from re-composition 62 | delete $d{$decomposition}; 63 | } 64 | } 65 | } 66 | 67 | 68 | # 69 | # Output the data 70 | # 71 | my @sorted_keys = sort 72 | { 73 | ( hex(($a =~ /^([a-fA-F0-9]+)/)[0]) 74 | <=> 75 | hex(($b =~ /^([a-fA-F0-9]+)/)[0]) 76 | ) || 77 | ( hex(($a =~ /\s([a-fA-F0-9]+)/)[0]) 78 | <=> 79 | hex(($b =~ /\s([a-fA-F0-9]+)/)[0]) 80 | ) 81 | } 82 | keys %d; 83 | 84 | open my $o, '>', 'canonical_composition_mapping_data.hpp' 85 | or die $!; 86 | print $o 87 | join(",\n", 88 | map { 89 | my $k = $_; 90 | my ($dc1, $dc2, $dc3) = split / /, $k; 91 | warn "$k has more than two decomposed characters" 92 | if $dc3; 93 | "{ ".join(", ", map { hex($_) } $dc1, $dc2, $d{$k})." }"; 94 | } @sorted_keys 95 | ); 96 | close $o; 97 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/canonical_decomposition_mapping.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef const std::initializer_list> 7 | data_t; 8 | 9 | static data_t 10 | data = 11 | { 12 | #include "canonical_decomposition_mapping_data.hpp" 13 | }; 14 | 15 | static int compare_value_to_row(const void* pkey, 16 | const void* pelem) { 17 | int const * const key = (int const * const)pkey; 18 | int const * const * const ref = (int const * const * const)pelem; 19 | int const * const elem = *ref; 20 | if (key[0] < elem[0]) { 21 | return -1; 22 | } else if (key[0] > elem[0]) { 23 | return 1; 24 | } else { 25 | return 0; 26 | } 27 | }; 28 | 29 | namespace u5e { 30 | namespace props { 31 | int const * const canonical_decomposition_mapping::resolve(int input) { 32 | int const * const * elem = 33 | (int const * const * const) 34 | bsearch((const void*)&input, (const void*)data.begin(), 35 | data.size(), sizeof(data_t::value_type), 36 | compare_value_to_row); 37 | if (elem == NULL) { 38 | return NULL; 39 | } else { 40 | int const * it = *elem; 41 | it++; // first element in array is codepoint itself 42 | return it; 43 | } 44 | } 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/canonical_decomposition_mapping_data.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | 5 | # 6 | # extract from UnicodeData.txt the Decomposition_Mapping attribute. 7 | # Note that this will already fully recurse into the fully decomposed 8 | # character. It is better to pay this extra space cost in read-only 9 | # data than to pay the extra runtime cost of recursively expanding it. 10 | # 11 | open my $ucd, '<', 'UnicodeData.txt' 12 | or die "Failed to open UnicodeData.txt: $!\n"; 13 | my %d; 14 | while (defined (my $line = <$ucd>)) { 15 | chomp $line; 16 | # we will not decode the hex data and store the codepoints as string. 17 | my ($codepoint, $decomposition) = (split /;/, $line)[0,5]; 18 | # ignore codepoints that are already decomposed. 19 | next unless $decomposition; 20 | next if $decomposition =~ /^\ hex($b) } keys %d; 52 | 53 | open my $o, '>', 'canonical_decomposition_mapping_data.hpp' 54 | or die $!; 55 | print $o 56 | join(",\n", 57 | map { 58 | my $k = $_; 59 | "{ ".hex($k).", ".join(", ", map { hex($_) } @{$d{$k}}). ", 0 }"; 60 | } @sorted_keys 61 | ); 62 | close $o; 63 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/compatibility_and_canonical_decomposition_mapping.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef const std::initializer_list> 7 | data_t; 8 | 9 | static data_t 10 | data = 11 | { 12 | #include "compatibility_and_canonical_decomposition_mapping_data.hpp" 13 | }; 14 | 15 | static int compare_value_to_row(const void* pkey, 16 | const void* pelem) { 17 | int const * const key = (int const * const)pkey; 18 | int const * const * const ref = (int const * const * const)pelem; 19 | int const * const elem = *ref; 20 | if (key[0] < elem[0]) { 21 | return -1; 22 | } else if (key[0] > elem[0]) { 23 | return 1; 24 | } else { 25 | return 0; 26 | } 27 | }; 28 | 29 | namespace u5e { 30 | namespace props { 31 | int const * const 32 | compatibility_and_canonical_decomposition_mapping::resolve(int input) { 33 | int const * const * elem = 34 | (int const * const * const) 35 | bsearch((const void*)&input, (const void*)data.begin(), 36 | data.size(), sizeof(data_t::value_type), 37 | compare_value_to_row); 38 | if (elem == NULL) { 39 | return NULL; 40 | } else { 41 | int const * it = *elem; 42 | it++; // first element in array is codepoint itself 43 | return it; 44 | } 45 | } 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/compatibility_and_canonical_decomposition_mapping_data.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | 5 | # 6 | # extract from UnicodeData.txt the Decomposition_Mapping attribute. 7 | # Note that this will already fully recurse into the fully decomposed 8 | # character. It is better to pay this extra space cost in read-only 9 | # data than to pay the extra runtime cost of recursively expanding it. 10 | # 11 | open my $ucd, '<', 'UnicodeData.txt' 12 | or die "Failed to open UnicodeData.txt: $!\n"; 13 | my %d; 14 | while (defined (my $line = <$ucd>)) { 15 | chomp $line; 16 | # we will not decode the hex data and store the codepoints as string. 17 | my ($codepoint, $decomposition) = (split /;/, $line)[0,5]; 18 | # ignore codepoints that are already decomposed. 19 | next unless $decomposition; 20 | # we include the compat decompoistions as well 21 | $decomposition =~ s/^ //; 22 | # but no other 23 | next if $decomposition =~ /^\ hex($b) } keys %d; 55 | 56 | open my $o, '>', 'compatibility_and_canonical_decomposition_mapping_data.hpp' 57 | or die $!; 58 | print $o 59 | join(",\n", 60 | map { 61 | my $k = $_; 62 | "{ ".hex($k).", ".join(", ", map { hex($_) } @{$d{$k}}). ", 0 }"; 63 | } @sorted_keys 64 | ); 65 | close $o; 66 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/grapheme_cluster_break.cpp: -------------------------------------------------------------------------------- 1 |  2 | #include 3 | 4 | using u5e::codepoint; 5 | using u5e::props::grapheme_cluster_break; 6 | 7 | typedef struct range_data_st { 8 | codepoint range_start; 9 | codepoint range_end; 10 | grapheme_cluster_break::prop_value_type value; 11 | } range_data; 12 | 13 | static range_data ranges[] = { 14 | // this file is generated from the unicode database 15 | // grapheme_cluster_break is small enough that it makes sense 16 | // to just load it 17 | #include "grapheme_cluster_break_data.hpp" 18 | }; 19 | 20 | static int compare_value_to_range(const void* pkey, 21 | const void* pelem) { 22 | range_data* key = (range_data*)pkey; 23 | range_data* elem = (range_data*)pelem; 24 | if (key->range_start < elem->range_start) { 25 | return -1; 26 | } else if (key->range_start >= elem->range_start && 27 | key->range_start <= elem->range_end) { 28 | return 0; 29 | } else { 30 | return 1; 31 | } 32 | }; 33 | 34 | template 35 | static constexpr size_t countof(T(&)[N]) 36 | { 37 | return N; 38 | } 39 | 40 | grapheme_cluster_break::prop_value_type 41 | grapheme_cluster_break::resolve(codepoint c) { 42 | range_data key = {c, c, prop_value_type::OTHER}; 43 | range_data* elem = (range_data*) 44 | bsearch(&key, ranges, 45 | countof(ranges), sizeof(range_data), 46 | compare_value_to_range); 47 | if (elem) { 48 | return elem->value; 49 | } else { 50 | return grapheme_cluster_break::prop_value_type::OTHER; 51 | } 52 | }; 53 | 54 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/props/grapheme_cluster_break_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cat GraphemeBreakProperty.txt \ 3 | | grep '^[A-F0-9]' \ 4 | | perl -pe 's/^([0-9A-F]+)\s/$1..$1/' \ 5 | | perl -pe 's/^([A-F0-9]+)..([A-F0-9]+)\s+;\s(\S+).+$/"{ ".join(", ",hex($1),hex($2),"grapheme_cluster_break::prop_value_type::".uc($3))." },"/e' \ 6 | | sort -k2 -n > grapheme_cluster_break_data.hpp 7 | -------------------------------------------------------------------------------- /extlib/u5e/src/u5e/version.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace u5e { 4 | const int version::run_time::major = 0; 5 | const int version::run_time::minor = 0; 6 | const int version::run_time::patch = 0; 7 | } 8 | -------------------------------------------------------------------------------- /include/Aheuiplusplus/Aheuiplusplus.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_AHEUIPLUSPLUS_HPP 2 | #define AHEUIPLUSPLUS_HEADER_AHEUIPLUSPLUS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/code.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_CODE_HPP 2 | #define AHEUIPLUSPLUS_HEADER_CODE_HPP 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace app 17 | { 18 | using u5e::basic_grapheme; 19 | using grapheme = u5e::basic_grapheme; 20 | 21 | namespace details 22 | { 23 | template 24 | struct is_basic_string 25 | { 26 | static constexpr bool value = false; 27 | }; 28 | template 29 | struct is_basic_string> 30 | { 31 | static constexpr bool value = true; 32 | }; 33 | 34 | template 35 | struct is_basic_string_view 36 | { 37 | static constexpr bool value = false; 38 | }; 39 | template 40 | struct is_basic_string_view> 41 | { 42 | static constexpr bool value = true; 43 | }; 44 | } 45 | 46 | template 47 | class basic_code final 48 | { 49 | public: 50 | using string_type = String_; 51 | 52 | public: 53 | basic_code() = default; 54 | basic_code(const String_& codes) 55 | { 56 | parse_codes_(codes); 57 | } 58 | template::value && 60 | details::is_basic_string_view::value) || 61 | (details::is_basic_string_view::value && 62 | details::is_basic_string::value) 63 | >::type> 64 | basic_code(const Code_& code) 65 | { 66 | make_from_code_(code); 67 | } 68 | basic_code(const basic_code& code) 69 | : codes_(code.codes_) 70 | {} 71 | basic_code(basic_code&& code) noexcept 72 | : codes_(std::move(code.codes_)) 73 | {} 74 | ~basic_code() = default; 75 | 76 | public: 77 | basic_code& operator=(const String_& codes) 78 | { 79 | parse_codes_(codes); 80 | return *this; 81 | } 82 | basic_code& operator=(const basic_code& code) 83 | { 84 | codes_ = code.codes_; 85 | return *this; 86 | } 87 | basic_code& operator=(basic_code&& code) noexcept 88 | { 89 | codes_ = std::move(code.codes_); 90 | return *this; 91 | } 92 | bool operator==(const basic_code& code) const = delete; 93 | bool operator!=(const basic_code& code) const = delete; 94 | 95 | public: 96 | bool empty() const noexcept 97 | { 98 | return codes_.empty(); 99 | } 100 | void clear() noexcept 101 | { 102 | codes_.clear(); 103 | codes_grapheme_.clear(); 104 | } 105 | basic_grapheme at(std::size_t x, std::size_t y) const 106 | { 107 | return codes_grapheme_.at(y).at(x); 108 | } 109 | basic_grapheme at(const point& location) const 110 | { 111 | return at(location.x(), location.y()); 112 | } 113 | basic_grapheme at(const cursor& location) const 114 | { 115 | return at(location.x(), location.y()); 116 | } 117 | const std::vector>& at(std::size_t y) const 118 | { 119 | return codes_grapheme_.at(y); 120 | } 121 | const std::vector>& line(std::size_t y) const 122 | { 123 | return at(y); 124 | } 125 | const std::vector>& line(const point& location) const 126 | { 127 | return at(location.y()); 128 | } 129 | const std::vector>& line(const cursor& location) const 130 | { 131 | return at(location.y()); 132 | } 133 | 134 | std::size_t width(std::size_t y) const 135 | { 136 | return at(y).size(); 137 | } 138 | std::size_t height(std::size_t x) const 139 | { 140 | for (std::size_t i = codes_grapheme_.size() - 1; i >= 0; --i) 141 | { 142 | if (at(i).size() > x) 143 | { 144 | return i + 1; 145 | } 146 | else if (i == 0) 147 | { 148 | return 0; 149 | } 150 | } 151 | } 152 | 153 | private: 154 | void parse_codes_(const String_& codes) 155 | { 156 | clear(); 157 | 158 | std::size_t pos = 0; 159 | std::size_t line_pos; 160 | 161 | using grapheme_iterator = u5e::basic_grapheme_iterator; 162 | 163 | static constexpr auto cr = 164 | static_cast('\r'); 165 | static constexpr auto lf = 166 | static_cast('\n'); 167 | 168 | while ((line_pos = codes.find(lf, pos)) != String_::npos) 169 | { 170 | String_& line = codes_.emplace_back(codes.substr(pos, line_pos - pos)); 171 | pos = line_pos + 1; 172 | 173 | if (!line.empty() && line.back() == cr) 174 | { 175 | if constexpr (details::is_basic_string_view::value) 176 | { 177 | line.remove_suffix(1); 178 | } 179 | else 180 | { 181 | line.erase(line.end() - 1); 182 | } 183 | } 184 | 185 | const grapheme_iterator end = grapheme_iterator(line.begin(), line.end(), line.end()); 186 | std::vector>& line_grapheme = codes_grapheme_.emplace_back(); 187 | 188 | for (grapheme_iterator iter(line.begin(), line.end()); iter != end; ++iter) 189 | { 190 | line_grapheme.push_back(*iter); 191 | } 192 | } 193 | 194 | if (pos < codes.size()) 195 | { 196 | String_& line = codes_.emplace_back(codes.substr(pos)); 197 | 198 | const grapheme_iterator end = grapheme_iterator(line.begin(), line.end(), line.end()); 199 | std::vector>& line_grapheme = codes_grapheme_.emplace_back(); 200 | 201 | for (grapheme_iterator iter(line.begin(), line.end()); iter != end; ++iter) 202 | { 203 | line_grapheme.push_back(*iter); 204 | } 205 | } 206 | } 207 | template 208 | void make_from_code_(const Code_& code) 209 | { 210 | clear(); 211 | 212 | using grapheme_iterator = u5e::basic_grapheme_iterator; 213 | 214 | for (const auto& line_org : code.codes()) 215 | { 216 | String_& line = codes_.emplace_back(line_org); 217 | 218 | const grapheme_iterator end = grapheme_iterator(line.begin(), line.end(), line.end()); 219 | std::vector>& line_grapheme = codes_grapheme_.emplace_back(); 220 | 221 | for (grapheme_iterator iter(line.begin(), line.end()); iter != end; ++iter) 222 | { 223 | line_grapheme.push_back(*iter); 224 | } 225 | } 226 | } 227 | 228 | public: 229 | const std::vector& codes() const noexcept 230 | { 231 | return codes_; 232 | } 233 | const std::vector>>& codes_grapheme() const noexcept 234 | { 235 | return codes_grapheme_; 236 | } 237 | 238 | private: 239 | std::vector codes_; 240 | std::vector>> codes_grapheme_; 241 | }; 242 | 243 | using code = basic_code; 244 | using code_view = basic_code; 245 | } 246 | 247 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/command_line.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHUEIPLUSPLUS_HEADER_COMMAND_LINE_HPP 2 | #define AHUEIPLUSPLUS_HEADER_COMMAND_LINE_HPP 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace app 11 | { 12 | class command_line final 13 | { 14 | public: 15 | command_line() = default; 16 | command_line(const command_line& data); 17 | command_line(command_line&& data) noexcept; 18 | ~command_line() = default; 19 | 20 | public: 21 | command_line& operator=(const command_line& data); 22 | command_line& operator=(command_line&& data) noexcept; 23 | bool operator==(const command_line& data) const = delete; 24 | bool operator!=(const command_line& data) const = delete; 25 | 26 | public: 27 | bool parse(int argc, char** argv); 28 | bool parse(std::ostream& error_stream, int argc, char** argv); 29 | 30 | public: 31 | bool option_aheui() const noexcept; 32 | void option_aheui(bool new_option_aheui) noexcept; 33 | bool option_interpreting_mode() const noexcept; 34 | void option_interpreting_mode(bool new_option_interpreting_mode) noexcept; 35 | bool option_debugging_mode() const noexcept; 36 | void option_debugging_mode(bool new_option_debugging_mode) noexcept; 37 | version option_version() const noexcept; 38 | void option_version(version new_option_version) noexcept; 39 | bool option_utf8() const noexcept; 40 | void option_utf8(bool new_option_utf8) noexcept; 41 | bool option_utf16() const noexcept; 42 | void option_utf16(bool new_option_utf16) noexcept; 43 | bool option_utf16be() const noexcept; 44 | void option_utf16be(bool new_option_utf16be) noexcept; 45 | 46 | bool option_loud_mode() const noexcept; 47 | void option_loud_mode(bool new_option_loud_mode) noexcept; 48 | 49 | std::string option_code_path() const; 50 | void option_code_path(const std::string_view& new_option_code_path); 51 | void option_code_path(std::string&& new_option_code_path); 52 | 53 | private: 54 | bool option_aheui_ = false; 55 | bool option_interpreting_mode_ = false; 56 | bool option_debugging_mode_ = false; 57 | version option_version_ = version::none; 58 | bool option_utf8_ = false; 59 | bool option_utf16_ = false; 60 | bool option_utf16be_ = false; 61 | 62 | bool option_loud_mode_ = false; 63 | 64 | std::string option_code_path_; 65 | }; 66 | } 67 | 68 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/cursor.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_CURSOR_HPP 2 | #define AHEUIPLUSPLUS_HEADER_CURSOR_HPP 3 | 4 | #include 5 | 6 | namespace app 7 | { 8 | enum class direction 9 | { 10 | left, 11 | right, 12 | up, 13 | down, 14 | }; 15 | 16 | class point final 17 | { 18 | public: 19 | point() noexcept = default; 20 | point(std::size_t x, std::size_t y) noexcept; 21 | point(const app::point& point) noexcept; 22 | ~point() = default; 23 | 24 | public: 25 | point& operator=(const app::point& point) noexcept; 26 | bool operator==(const app::point& point) const noexcept; 27 | bool operator!=(const app::point& point) const noexcept; 28 | 29 | public: 30 | std::size_t x() const noexcept; 31 | std::size_t& x() noexcept; 32 | std::size_t y() const noexcept; 33 | std::size_t& y() noexcept; 34 | 35 | private: 36 | std::size_t x_ = 0; 37 | std::size_t y_ = 0; 38 | }; 39 | 40 | class cursor final 41 | { 42 | public: 43 | cursor() noexcept = default; 44 | cursor(const app::cursor& cursor) noexcept; 45 | ~cursor() = default; 46 | 47 | public: 48 | cursor& operator=(const app::cursor& cursor) noexcept; 49 | bool operator==(const app::cursor& cursor) const noexcept; 50 | bool operator!=(const app::cursor& cursor) const noexcept; 51 | 52 | public: 53 | std::size_t x() const noexcept; 54 | std::size_t& x() noexcept; 55 | std::size_t y() const noexcept; 56 | std::size_t& y() noexcept; 57 | app::direction direction() const noexcept; 58 | app::direction& direction() noexcept; 59 | std::size_t speed() const noexcept; 60 | std::size_t& speed() noexcept; 61 | 62 | private: 63 | app::point point_; 64 | app::direction direction_ = app::direction::down; 65 | std::size_t speed_ = 1; 66 | }; 67 | } 68 | 69 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/debugger.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_DEBUGGER_HPP 2 | #define AHEUIPLUSPLUS_HEADER_DEBUGGER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace app 12 | { 13 | class debugger final 14 | { 15 | public: 16 | debugger() = default; 17 | debugger(const code_view& code, const command_line& command_line); 18 | debugger(code&& code, const command_line& command_line); 19 | debugger(const debugger& debugger) = delete; 20 | debugger(debugger&& debugger) noexcept = delete; 21 | ~debugger() = default; 22 | 23 | public: 24 | debugger& operator=(const debugger& debugger) = delete; 25 | debugger& operator=(debugger&& debugger) noexcept = delete; 26 | bool operator==(const debugger& debugger) const = delete; 27 | bool operator!=(const debugger& debugger) const = delete; 28 | 29 | public: 30 | const std::vector& breakpoints() const noexcept; 31 | std::vector& breakpoints() noexcept; 32 | 33 | private: 34 | std::vector breakpoints_; 35 | grapheme previous_command_; 36 | grapheme current_command_; 37 | 38 | interpreter interpreter_; 39 | }; 40 | } 41 | 42 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/element.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_ELEMENT_HPP 2 | #define AHEUIPLUSPLUS_HEADER_ELEMENT_HPP 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace app 12 | { 13 | enum class element_type 14 | { 15 | none, 16 | array = 0b0100000, 17 | reference = 0b1000000, 18 | 19 | number = 0b0000001, 20 | pointer = 0b0000010, 21 | instance = 0b0000100, 22 | function = 0b0001000, 23 | type = 0b0010000, 24 | 25 | array_of_number = number | array, 26 | array_of_pointer = pointer | array, 27 | array_of_instance = instance | array, 28 | array_of_function = function | array, 29 | array_of_type = type | array, 30 | 31 | reference_of_number = number | reference, 32 | reference_of_pointer = pointer | reference, 33 | reference_of_instance = instance | reference, 34 | reference_of_function = function | reference, 35 | reference_of_type = type | reference, 36 | 37 | reference_of_array_of_number = array_of_number | reference, 38 | reference_of_array_of_pointer = array_of_pointer | reference, 39 | reference_of_array_of_instance = array_of_instance | reference, 40 | reference_of_array_of_function = array_of_function | reference, 41 | reference_of_array_of_type = array_of_type | reference, 42 | }; 43 | 44 | using element_element = 45 | std::variant, // number 46 | std::uintptr_t, // pointer 47 | // TODO: instance 48 | function_ptr, // function 49 | element_type // type 50 | >; 51 | using element_base = std::variant>; 52 | using element = std::variant; 53 | using element_ptr = std::shared_ptr; 54 | 55 | element_type get_element_type(const element_element& element) noexcept; 56 | element_type get_element_type(const element_base& element) noexcept; 57 | element_type get_element_type(const element& element) noexcept; 58 | 59 | const element_base& dereference(const element& element) noexcept; 60 | element_base& dereference(element& element) noexcept; 61 | } 62 | 63 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/extension.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_EXTENSION_HPP 2 | #define AHEUIPLUSPLUS_HEADER_EXTENSION_HPP 3 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace app 13 | { 14 | class extension 15 | { 16 | public: 17 | extension(const extension& extension) = delete; 18 | extension(extension&& extension) noexcept = delete; 19 | virtual ~extension() = default; 20 | 21 | protected: 22 | extension() = default; 23 | 24 | public: 25 | extension& operator=(const extension& extension) = delete; 26 | extension& operator=(extension&& extension) noexcept = delete; 27 | bool operator==(const extension& extension) const = delete; 28 | bool operator!=(const extension& extension) const = delete; 29 | 30 | public: 31 | virtual std::u32string name() const = 0; 32 | virtual std::u32string developer() const = 0; 33 | virtual std::u32string description() const; 34 | 35 | virtual element_ptr pop() = 0; 36 | virtual element_ptr push(const element_ptr& value) = 0; 37 | 38 | virtual void enabled(); 39 | virtual void disabled(); 40 | 41 | public: 42 | std::tuple target_version() const; 43 | }; 44 | 45 | class extension_engine final 46 | { 47 | public: 48 | extension_engine(const std::vector& extensions); 49 | extension_engine(const extension_engine& engine) = delete; 50 | extension_engine(extension_engine&& engine) noexcept = delete; 51 | ~extension_engine(); 52 | 53 | public: 54 | extension_engine& operator=(const extension_engine& engine) = delete; 55 | extension_engine& operator=(extension_engine&& engine) noexcept = delete; 56 | bool operator==(const extension_engine& engine) const = delete; 57 | bool operator!=(const extension_engine& engine) const = delete; 58 | 59 | private: 60 | std::map extensions_; 61 | }; 62 | } 63 | 64 | #endif 65 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/function.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_FUNCTION_HPP 2 | #define AHEUIPLUSPLUS_HEADER_FUNCTION_HPP 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace app 10 | { 11 | enum class function_type 12 | { 13 | named_function, 14 | unnamed_function, 15 | native_function, 16 | }; 17 | 18 | class storages; 19 | 20 | class function_info 21 | { 22 | public: 23 | function_info(const function_info& function) = delete; 24 | function_info(function_info&& function) noexcept = delete; 25 | virtual ~function_info() = default; 26 | 27 | protected: 28 | function_info() = default; 29 | 30 | public: 31 | function_info& operator=(const function_info& function) = delete; 32 | function_info& operator=(function_info&& function) noexcept = delete; 33 | bool operator==(const function_info& function) const = delete; 34 | bool operator!=(const function_info& function) const = delete; 35 | 36 | public: 37 | virtual code name() const = 0; 38 | virtual function_type type() const noexcept = 0; 39 | virtual void call(storages& storages) = 0; 40 | }; 41 | 42 | using function_ptr = std::shared_ptr; 43 | 44 | class named_function final : public function_info 45 | { 46 | public: 47 | named_function(const code_view& name, const code_view& code); 48 | named_function(code&& name, const code_view& code); 49 | named_function(const code_view& name, code&& code); 50 | named_function(code&& name, code&& code) noexcept; 51 | named_function(const named_function& function) = delete; 52 | named_function(named_function&& function) noexcept = delete; 53 | virtual ~named_function() override = default; 54 | 55 | public: 56 | named_function& operator=(const named_function& function) = delete; 57 | named_function& operator=(named_function&& function) noexcept = delete; 58 | bool operator==(const named_function& function) const = delete; 59 | bool operator!=(const named_function& function) const = delete; 60 | 61 | public: 62 | virtual code name() const override; 63 | virtual function_type type() const noexcept override; 64 | virtual void call(storages& storages) override; 65 | 66 | private: 67 | code name_; 68 | code code_; 69 | }; 70 | 71 | class unnamed_function final : public function_info 72 | { 73 | public: 74 | explicit unnamed_function(const code_view& code); 75 | explicit unnamed_function(code&& code) noexcept; 76 | unnamed_function(const unnamed_function& function) = delete; 77 | unnamed_function(unnamed_function&& function) noexcept = delete; 78 | virtual ~unnamed_function() override = default; 79 | 80 | public: 81 | unnamed_function& operator=(const unnamed_function& function) = delete; 82 | unnamed_function& operator=(unnamed_function&& function) noexcept = delete; 83 | bool operator==(const unnamed_function& function) const = delete; 84 | bool operator!=(const unnamed_function& function) const = delete; 85 | 86 | public: 87 | virtual code name() const override; 88 | virtual function_type type() const noexcept override; 89 | virtual void call(storages& storages) override; 90 | 91 | private: 92 | code code_; 93 | }; 94 | 95 | using native_function_object = std::function; 96 | 97 | class native_function final : public function_info 98 | { 99 | public: 100 | native_function(const code_view& name, const native_function_object& function); 101 | native_function(const code_view& name, native_function_object&& function); 102 | native_function(code&& name, const native_function_object& function); 103 | native_function(code&& name, native_function_object&& function); 104 | native_function(const native_function& function) = delete; 105 | native_function(native_function&& function) noexcept = delete; 106 | virtual ~native_function() override = default; 107 | 108 | public: 109 | native_function& operator=(const native_function& function) = delete; 110 | native_function& operator=(native_function&& function) noexcept = delete; 111 | bool operator==(const native_function& function) const = delete; 112 | bool operator!=(const native_function& function) const = delete; 113 | 114 | public: 115 | virtual code name() const override; 116 | virtual function_type type() const noexcept override; 117 | virtual void call(storages& storages) override; 118 | 119 | private: 120 | code name_; 121 | native_function_object function_; 122 | }; 123 | } 124 | 125 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/interpreter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_INTERPRETER_HPP 2 | #define AHEUIPLUSPLUS_HEADER_INTERPRETER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace app 16 | { 17 | #define AHEUIPLUSPLUS_VERSION_STRING "2.0.0" 18 | #define AHEUIPLUSPLUS_VERSION 2 19 | #define AHEUIPLUSPLUS_VERSION_MAJOR AHEUIPLUSPLUS_VERSION 20 | #define AHEUIPLUSPLUS_VERSION_MINOR 0 21 | #define AHEUIPLUSPLUS_VERSION_PATCH 0 22 | 23 | inline constexpr const char* version_string = AHEUIPLUSPLUS_VERSION_STRING; 24 | inline constexpr int version_major = AHEUIPLUSPLUS_VERSION_MAJOR; 25 | inline constexpr int version_minor = AHEUIPLUSPLUS_VERSION_MINOR; 26 | inline constexpr int version_patch = AHEUIPLUSPLUS_VERSION_PATCH; 27 | 28 | class debugger; 29 | class interpreter; 30 | 31 | class interpreter_state 32 | { 33 | friend class debugger; 34 | friend class interpreter; 35 | 36 | public: 37 | interpreter_state() noexcept; 38 | interpreter_state(const interpreter_state& state) noexcept; 39 | ~interpreter_state() = default; 40 | 41 | public: 42 | interpreter_state& operator=(const interpreter_state& state) noexcept; 43 | bool operator==(const interpreter_state& state) const = delete; 44 | bool operator!=(const interpreter_state& state) const = delete; 45 | 46 | public: 47 | void reset() noexcept; 48 | 49 | public: 50 | app::cursor cursor() const noexcept; 51 | bool is_out_of_version() const noexcept; 52 | app::mode mode() const noexcept; 53 | 54 | private: 55 | app::cursor cursor_; 56 | bool is_out_of_version_; 57 | app::mode mode_; 58 | }; 59 | 60 | class interpreter final 61 | { 62 | friend class debugger; 63 | 64 | public: 65 | interpreter(const command_line& command_line); 66 | interpreter(std::istream& input_stream, std::ostream& output_stream, const command_line& command_line); 67 | interpreter(const interpreter& interpreter) = delete; 68 | interpreter(interpreter&& interpreter) noexcept = delete; 69 | ~interpreter() = default; 70 | 71 | private: 72 | interpreter(debugger* debugger, const command_line& command_line); 73 | interpreter(debugger* debugger, std::istream& input_stream, std::ostream& output_stream, const command_line& command_line); 74 | 75 | public: 76 | interpreter& operator=(const interpreter& interpreter) = delete; 77 | interpreter& operator=(interpreter&& interpreter) noexcept = delete; 78 | bool operator==(const interpreter& interpreter) const = delete; 79 | bool operator!=(const interpreter& interpreter) const = delete; 80 | 81 | public: 82 | void reset_state() noexcept; 83 | void reset_storages(); 84 | void reset_namespaces(); 85 | 86 | namespace_ptr create_namespace(const code_view& name); 87 | namespace_ptr create_namespace(app::code&& name); 88 | void add_namespace(const namespace_ptr& namespace_info); 89 | void remove_namespace(const namespace_ptr& namespace_info); 90 | 91 | public: 92 | const app::code& code() const noexcept; 93 | void code(const app::code_view& new_code); 94 | void code(app::code&& new_code) noexcept; 95 | 96 | const std::vector& namespaces() const noexcept; 97 | std::vector& namespaces() noexcept; 98 | 99 | private: 100 | app::code code_; 101 | interpreter_state state_; 102 | storages storages_; 103 | version version_; 104 | 105 | debugger* const debugger_ = nullptr; 106 | 107 | std::istream& input_stream_; 108 | std::ostream& output_stream_; 109 | int input_stream_mode_; 110 | int output_stream_mode_; 111 | 112 | std::vector namespaces_; 113 | }; 114 | } 115 | 116 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/mode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_MODE_HPP 2 | #define AHEUIPLUSPLUS_HEADER_MODE_HPP 3 | 4 | #include 5 | 6 | namespace app 7 | { 8 | class mode final 9 | { 10 | public: 11 | mode() noexcept = default; 12 | mode(bool is_integer_mode, bool is_aheui_compatible_mode) noexcept; 13 | mode(app::version minimum_version, app::version maximum_version); 14 | mode(bool is_integer_mode, bool is_aheui_compatible_mode, 15 | app::version minimum_version, app::version maximum_version); 16 | mode(const app::mode& mode) noexcept; 17 | ~mode() = default; 18 | 19 | public: 20 | mode& operator=(const mode& mode) noexcept; 21 | bool operator==(const mode& mode) const noexcept; 22 | bool operator!=(const mode& mode) const noexcept; 23 | 24 | public: 25 | void reset() noexcept; 26 | 27 | public: 28 | bool is_integer_mode() const noexcept; 29 | void is_integer_mode(bool new_is_integer_mode) noexcept; 30 | bool is_aheui_compatible_mode() const noexcept; 31 | void is_aheui_compatible_mode(bool new_is_aheui_compatible_mode) noexcept; 32 | app::version minimum_version() const noexcept; 33 | void minimum_version(app::version new_minimum_version); 34 | app::version maximum_version() const noexcept; 35 | void maximum_version(app::version new_maximum_version); 36 | 37 | private: 38 | bool is_integer_mode_ = true; 39 | bool is_aheui_compatible_mode_ = true; 40 | app::version minimum_version_ = app::version::none; 41 | app::version maximum_version_ = app::version::latest; 42 | }; 43 | } 44 | 45 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/namespace.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_NAMESPACE_HPP 2 | #define AHEUIPLUSPLUS_HEADER_NAMESPACE_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace app 11 | { 12 | class namespace_info final 13 | { 14 | public: 15 | explicit namespace_info(const code_view& name); 16 | explicit namespace_info(code&& name); 17 | namespace_info(const namespace_info& namespace_) = delete; 18 | namespace_info(namespace_info&& namespace_) noexcept = delete; 19 | ~namespace_info() = default; 20 | 21 | public: 22 | namespace_info& operator=(const namespace_info& namespace_) = delete; 23 | namespace_info& operator=(namespace_info&& namespace_) noexcept = delete; 24 | bool operator==(const namespace_info& namespace_) const = delete; 25 | bool operator!=(const namespace_info& namespace_) const = delete; 26 | 27 | public: 28 | code_view name() const; 29 | 30 | const std::vector>& namespaces() const noexcept; 31 | std::vector>& namespaces() noexcept; 32 | const std::vector& functions() const noexcept; 33 | std::vector& functions() noexcept; 34 | 35 | private: 36 | code name_; 37 | 38 | std::vector> namespaces_; 39 | std::vector functions_; 40 | }; 41 | 42 | using namespace_ptr = std::shared_ptr; 43 | } 44 | 45 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/storage.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_STORAGE_HPP 2 | #define AHEUIPLUSPLUS_HEADER_STORAGE_HPP 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace app 13 | { 14 | enum class storage_type 15 | { 16 | list, 17 | queue, 18 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 19 | pipe, 20 | #endif 21 | }; 22 | 23 | class storage 24 | { 25 | public: 26 | storage(const storage& storage) = delete; 27 | storage(storage&& storage) noexcept = delete; 28 | virtual ~storage() = default; 29 | 30 | protected: 31 | storage() = default; 32 | 33 | public: 34 | storage& operator=(const storage& storage) = delete; 35 | storage& operator=(storage&& storage) noexcept = delete; 36 | bool operator==(const storage& storage) const = delete; 37 | bool operator!=(const storage& storage) const = delete; 38 | 39 | public: 40 | virtual storage_type type() const noexcept = 0; 41 | 42 | virtual element_ptr pop() = 0; 43 | virtual element_ptr push(const element_ptr& value) = 0; 44 | 45 | virtual element_ptr copy() = 0; 46 | virtual void move(const element_ptr& value) = 0; 47 | virtual void swap() = 0; 48 | 49 | virtual std::size_t size() const noexcept = 0; 50 | 51 | virtual void clear() = 0; 52 | }; 53 | 54 | using storage_ptr = std::shared_ptr; 55 | 56 | class list final : public storage 57 | { 58 | public: 59 | list() = default; 60 | list(const list& list); 61 | list(list&& list) noexcept; 62 | virtual ~list() override = default; 63 | 64 | public: 65 | list& operator=(const list& list); 66 | list& operator=(list&& list) noexcept; 67 | bool operator==(const list& list) const = delete; 68 | bool operator!=(const list& list) const = delete; 69 | 70 | public: 71 | virtual storage_type type() const noexcept override; 72 | 73 | virtual element_ptr pop() override; 74 | virtual element_ptr push(const element_ptr& value) override; 75 | 76 | virtual element_ptr copy() override; 77 | virtual void move(const element_ptr& value) override; 78 | virtual void swap() override; 79 | 80 | virtual std::size_t size() const noexcept override; 81 | 82 | virtual void clear() override; 83 | 84 | public: 85 | std::size_t physical_size() const noexcept; 86 | 87 | private: 88 | std::vector list_; 89 | std::size_t virtual_length_ = 0; 90 | }; 91 | 92 | class queue final : public storage 93 | { 94 | public: 95 | queue() = default; 96 | queue(const queue& queue); 97 | queue(queue&& queue) noexcept; 98 | virtual ~queue() override = default; 99 | 100 | public: 101 | queue& operator=(const queue& queue); 102 | queue& operator=(queue&& queue) noexcept; 103 | bool operator==(const queue& queue) const = delete; 104 | bool operator!=(const queue& queue) const = delete; 105 | 106 | public: 107 | virtual storage_type type() const noexcept override; 108 | 109 | virtual element_ptr pop() override; 110 | virtual element_ptr push(const element_ptr& value) override; 111 | 112 | virtual element_ptr copy() override; 113 | virtual void move(const element_ptr& value) override; 114 | virtual void swap() override; 115 | 116 | virtual std::size_t size() const noexcept override; 117 | 118 | virtual void clear() override; 119 | 120 | private: 121 | std::deque deque_; 122 | }; 123 | 124 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 125 | class pipe final : public storage 126 | { 127 | public: 128 | pipe(extension* extension) noexcept; 129 | pipe(const pipe& pipe) noexcept; 130 | pipe(pipe&& pipe) noexcept; 131 | virtual ~pipe() override = default; 132 | 133 | public: 134 | pipe& operator=(const pipe& pipe) noexcept; 135 | pipe& operator=(pipe&& pipe) noexcept; 136 | bool operator==(const pipe& pipe) const = delete; 137 | bool operator!=(const pipe& pipe) const = delete; 138 | 139 | public: 140 | virtual storage_type type() const noexcept override; 141 | 142 | virtual element_ptr pop() override; 143 | virtual element_ptr push(const element_ptr& value) override; 144 | 145 | virtual element_ptr copy() override; 146 | virtual void move(const element_ptr& value) override; 147 | virtual void swap() override; 148 | 149 | virtual std::size_t size() const noexcept override; 150 | 151 | virtual void clear() override; 152 | 153 | private: 154 | extension* extension_; 155 | element_ptr last_sent_value_ = nullptr; 156 | }; 157 | #endif 158 | 159 | class storages final 160 | { 161 | public: 162 | storages(); 163 | storages(const storages& storages); 164 | storages(storages&& storages) noexcept; 165 | ~storages() = default; 166 | 167 | public: 168 | storages& operator=(const storages& storages); 169 | storages& operator=(storages&& storages) noexcept; 170 | bool operator==(const storages& storages) const = delete; 171 | bool operator!=(const storages& storages) const = delete; 172 | 173 | public: 174 | storage_ptr get() const; 175 | void reset(); 176 | 177 | public: 178 | std::size_t storage_max_index() const noexcept; 179 | std::size_t storage_index() const noexcept; 180 | void storage_index(std::size_t new_storage_index); 181 | void selected_storage(std::size_t new_selected_storage); 182 | 183 | private: 184 | std::vector> storages_; 185 | std::vector storages_index_; 186 | std::size_t selected_storage_ = 0; 187 | }; 188 | } 189 | 190 | #endif -------------------------------------------------------------------------------- /include/Aheuiplusplus/version.hpp: -------------------------------------------------------------------------------- 1 | #ifndef AHEUIPLUSPLUS_HEADER_VERSION_HPP 2 | #define AHEUIPLUSPLUS_HEADER_VERSION_HPP 3 | 4 | namespace app 5 | { 6 | enum class version 7 | { 8 | none, 9 | 10 | v1_0, 11 | v1_1, 12 | 13 | v2_0, 14 | 15 | latest_v1 = v1_1, 16 | latest_v2 = v2_0, 17 | latest = latest_v2, 18 | }; 19 | 20 | int get_major(app::version version) noexcept; 21 | int get_minor(app::version version) noexcept; 22 | version get_version(int major) noexcept; 23 | version get_version(int major, int minor) noexcept; 24 | 25 | bool operator>(app::version lhs, app::version rhs) noexcept; 26 | bool operator>=(app::version lhs, app::version rhs) noexcept; 27 | bool operator<(app::version lhs, app::version rhs) noexcept; 28 | bool operator<=(app::version lhs, app::version rhs) noexcept; 29 | } 30 | 31 | #endif -------------------------------------------------------------------------------- /src/cursor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace app 4 | { 5 | point::point(std::size_t x, std::size_t y) noexcept 6 | : x_(x), y_(y) 7 | {} 8 | point::point(const app::point& point) noexcept 9 | : x_(point.x_), y_(point.y_) 10 | {} 11 | 12 | point& point::operator=(const app::point& point) noexcept 13 | { 14 | x_ = point.x_; 15 | y_ = point.y_; 16 | 17 | return *this; 18 | } 19 | bool point::operator==(const app::point& point) const noexcept 20 | { 21 | return x_ == point.x_ && y_ == point.y_; 22 | } 23 | bool point::operator!=(const app::point& point) const noexcept 24 | { 25 | return x_ != point.x_ || y_ != point.y_; 26 | } 27 | 28 | std::size_t point::x() const noexcept 29 | { 30 | return x_; 31 | } 32 | std::size_t& point::x() noexcept 33 | { 34 | return x_; 35 | } 36 | std::size_t point::y() const noexcept 37 | { 38 | return y_; 39 | } 40 | std::size_t& point::y() noexcept 41 | { 42 | return y_; 43 | } 44 | } 45 | 46 | namespace app 47 | { 48 | cursor::cursor(const app::cursor& cursor) noexcept 49 | : point_(cursor.point_), direction_(cursor.direction_), speed_(cursor.speed_) 50 | {} 51 | 52 | cursor& cursor::operator=(const app::cursor& cursor) noexcept 53 | { 54 | point_ = cursor.point_; 55 | direction_ = cursor.direction_; 56 | speed_ = cursor.speed_; 57 | 58 | return *this; 59 | } 60 | bool cursor::operator==(const app::cursor& cursor) const noexcept 61 | { 62 | return point_ == cursor.point_ && direction_ == cursor.direction_ && 63 | speed_ == cursor.speed_; 64 | } 65 | bool cursor::operator!=(const app::cursor& cursor) const noexcept 66 | { 67 | return point_ != cursor.point_ || direction_ != cursor.direction_ || 68 | speed_ != cursor.speed_; 69 | } 70 | 71 | std::size_t cursor::x() const noexcept 72 | { 73 | return point_.x(); 74 | } 75 | std::size_t& cursor::x() noexcept 76 | { 77 | return point_.x(); 78 | } 79 | std::size_t cursor::y() const noexcept 80 | { 81 | return point_.y(); 82 | } 83 | std::size_t& cursor::y() noexcept 84 | { 85 | return point_.y(); 86 | } 87 | direction cursor::direction() const noexcept 88 | { 89 | return direction_; 90 | } 91 | direction& cursor::direction() noexcept 92 | { 93 | return direction_; 94 | } 95 | std::size_t cursor::speed() const noexcept 96 | { 97 | return speed_; 98 | } 99 | std::size_t& cursor::speed() noexcept 100 | { 101 | return speed_; 102 | } 103 | } -------------------------------------------------------------------------------- /src/debugger.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace app 6 | { 7 | debugger::debugger(const code_view& code, const command_line& command_line) 8 | : interpreter_(this, command_line) 9 | { 10 | interpreter_.code(code); 11 | } 12 | debugger::debugger(code&& code, const command_line& command_line) 13 | : interpreter_(this, command_line) 14 | { 15 | interpreter_.code(std::move(code)); 16 | } 17 | 18 | const std::vector& debugger::breakpoints() const noexcept 19 | { 20 | return breakpoints_; 21 | } 22 | std::vector& debugger::breakpoints() noexcept 23 | { 24 | return breakpoints_; 25 | } 26 | } -------------------------------------------------------------------------------- /src/element.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace app 4 | { 5 | element_type get_element_type(const element_element & element) noexcept 6 | { 7 | switch (element.index()) 8 | { 9 | case 0: 10 | return element_type::number; 11 | 12 | case 1: 13 | return element_type::pointer; 14 | 15 | case 2: 16 | return element_type::instance; 17 | 18 | case 3: 19 | return element_type::function; 20 | 21 | case 4: 22 | return element_type::type; 23 | 24 | default: 25 | return element_type::none; 26 | } 27 | } 28 | element_type get_element_type(const app::element_base& element) noexcept 29 | { 30 | if (element.index() == 0) 31 | { 32 | return get_element_type(std::get<0>(element)); 33 | } 34 | 35 | return static_cast( 36 | static_cast( 37 | get_element_type(std::get<1>(element)[0]) 38 | ) | static_cast(element_type::array) 39 | ); 40 | } 41 | element_type get_element_type(const app::element& element) noexcept 42 | { 43 | if (element.index() == 0) 44 | { 45 | return get_element_type(std::get<0>(element)); 46 | } 47 | 48 | return static_cast( 49 | static_cast( 50 | get_element_type(*std::get<1>(element)) 51 | ) | static_cast(element_type::reference) 52 | ); 53 | } 54 | 55 | const element_base& dereference(const element& element) noexcept 56 | { 57 | if (element.index() == 0) 58 | { 59 | return std::get<0>(element); 60 | } 61 | else 62 | { 63 | return *std::get<1>(element); 64 | } 65 | } 66 | element_base& dereference(element& element) noexcept 67 | { 68 | if (element.index() == 0) 69 | { 70 | return std::get<0>(element); 71 | } 72 | else 73 | { 74 | return *std::get<1>(element); 75 | } 76 | } 77 | } -------------------------------------------------------------------------------- /src/extension.cpp: -------------------------------------------------------------------------------- 1 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #if defined(WIN32) || defined(WIN64) || defined(_WIN32) || defined(_WIN64) 12 | # include 13 | #else 14 | # include 15 | #endif 16 | 17 | namespace app 18 | { 19 | std::u32string extension::description() const 20 | { 21 | return std::u32string(); 22 | } 23 | 24 | void extension::enabled() 25 | {} 26 | void extension::disabled() 27 | {} 28 | 29 | std::tuple extension::target_version() const 30 | { 31 | return { version_major, version_minor, version_patch }; 32 | } 33 | } 34 | 35 | namespace app 36 | { 37 | extension_engine::extension_engine(const std::vector& extensions) 38 | { 39 | for (const std::string& path : extensions) 40 | { 41 | using allocate_function = void*(); 42 | 43 | #if defined(WIN32) || defined(WIN64) || defined(_WIN32) || defined(_WIN64) 44 | HMODULE extension_module = LoadLibraryA(path.c_str()); 45 | if (!extension_module) 46 | throw std::runtime_error("인수 extensions 중 '" + path + "'를 여는데 실패했습니다."); 47 | 48 | const std::map::iterator iter = 49 | std::find_if(extensions_.begin(), extensions_.end(), [extension_module](const auto& data) 50 | { 51 | return data.first == extension_module; 52 | }); 53 | if (iter != extensions_.end()) 54 | throw std::runtime_error("인수 extensions 중 '" + path + "'가 두번 이상 등록되었습니다."); 55 | 56 | allocate_function* allocate = reinterpret_cast( 57 | GetProcAddress(extension_module, "allocate_extension")); 58 | if (!allocate) 59 | { 60 | FreeLibrary(extension_module); 61 | throw std::runtime_error("인수 extensions 중 '" + path + "'는 올바른 아희++ 표준 인터프리터 확장이 아닙니다."); 62 | } 63 | 64 | extension* extension = reinterpret_cast(allocate()); 65 | if (!extension) 66 | { 67 | FreeLibrary(extension_module); 68 | throw std::runtime_error("인수 extensions 중 '" + path + "'를 등록하는데 실패했습니다."); 69 | } 70 | 71 | extensions_[extension_module] = extension; 72 | extension->enabled(); 73 | #else 74 | void* extension_module = dlopen(path.c_str(), RTLD_LAZY); 75 | if (!extension_module) 76 | throw std::runtime_error("인수 extensions 중 '" + path + "'를 여는데 실패했습니다."); 77 | 78 | const std::map::iterator iter = 79 | std::find_if(extensions_.begin(), extensions_.end(), [extension_module](const auto& data) 80 | { 81 | return data.first == extension_module; 82 | }); 83 | if (iter != extensions_.end()) 84 | throw std::runtime_error("인수 extensions 중 '" + path + "'가 두번 이상 등록되었습니다."); 85 | 86 | allocate_function* allocate = reinterpret_cast( 87 | dlsym(extension_module, "allocate_extension")); 88 | if (!allocate) 89 | { 90 | dlclose(extension_module); 91 | throw std::runtime_error("인수 extensions 중 '" + path + "'는 올바른 아희++ 표준 인터프리터 확장이 아닙니다."); 92 | } 93 | 94 | extension* extension = reinterpret_cast(allocate()); 95 | if (!extension) 96 | { 97 | dlclose(extension_module); 98 | throw std::runtime_error("인수 extensions 중 '" + path + "'를 등록하는데 실패했습니다."); 99 | } 100 | 101 | extensions_[extension_module] = extension; 102 | extension->enabled(); 103 | #endif 104 | } 105 | } 106 | extension_engine::~extension_engine() 107 | { 108 | for (const std::pair& extension : extensions_) 109 | { 110 | extension.second->disabled(); 111 | delete extension.second; 112 | 113 | #if defined(WIN32) || defined(WIN64) || defined(_WIN32) || defined(_WIN64) 114 | FreeLibrary(reinterpret_cast(extension.first)); 115 | #else 116 | dlclose(extension.first); 117 | #endif 118 | } 119 | } 120 | } 121 | 122 | #endif -------------------------------------------------------------------------------- /src/function.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace app 8 | { 9 | named_function::named_function(const code_view& name, const code_view& code) 10 | : name_(name), code_(code) 11 | {} 12 | named_function::named_function(code&& name, const code_view& code) 13 | : name_(std::move(name)), code_(code) 14 | {} 15 | named_function::named_function(const code_view& name, code&& code) 16 | : name_(name), code_(std::move(code)) 17 | {} 18 | named_function::named_function(code&& name, code&& code) noexcept 19 | : name_(std::move(name)), code_(std::move(code)) 20 | {} 21 | 22 | code named_function::name() const 23 | { 24 | return name_; 25 | } 26 | function_type named_function::type() const noexcept 27 | { 28 | return function_type::named_function; 29 | } 30 | void named_function::call(storages& storages) 31 | { 32 | // TODO 33 | } 34 | } 35 | 36 | namespace app 37 | { 38 | unnamed_function::unnamed_function(const code_view& code) 39 | : code_(code) 40 | {} 41 | unnamed_function::unnamed_function(code&& code) noexcept 42 | : code_(std::move(code)) 43 | {} 44 | 45 | code unnamed_function::name() const 46 | { 47 | return code(); 48 | } 49 | function_type unnamed_function::type() const noexcept 50 | { 51 | return function_type::unnamed_function; 52 | } 53 | void unnamed_function::call(storages& storages) 54 | { 55 | // TODO 56 | } 57 | } 58 | 59 | namespace app 60 | { 61 | native_function::native_function(const code_view& name, const native_function_object& function) 62 | : name_(name), function_(function) 63 | {} 64 | native_function::native_function(const code_view& name, native_function_object&& function) 65 | : name_(name), function_(std::move(function)) 66 | {} 67 | native_function::native_function(code&& name, const native_function_object& function) 68 | : name_(std::move(name)), function_(function) 69 | {} 70 | native_function::native_function(code&& name, native_function_object&& function) 71 | : name_(std::move(name)), function_(std::move(function)) 72 | {} 73 | 74 | code native_function::name() const 75 | { 76 | return name_; 77 | } 78 | function_type native_function::type() const noexcept 79 | { 80 | return function_type::native_function; 81 | } 82 | void native_function::call(storages& storages) 83 | { 84 | function_(storages); 85 | } 86 | } -------------------------------------------------------------------------------- /src/interpreter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace app 11 | { 12 | interpreter_state::interpreter_state() noexcept 13 | { 14 | reset(); 15 | } 16 | interpreter_state::interpreter_state(const interpreter_state& state) noexcept 17 | : cursor_(state.cursor_), is_out_of_version_(state.is_out_of_version_) 18 | {} 19 | 20 | interpreter_state& interpreter_state::operator=(const interpreter_state& state) noexcept 21 | { 22 | cursor_ = state.cursor_; 23 | is_out_of_version_ = state.is_out_of_version_; 24 | 25 | return *this; 26 | } 27 | 28 | void interpreter_state::reset() noexcept 29 | { 30 | cursor_.x() = 0; 31 | cursor_.y() = 0; 32 | cursor_.direction() = direction::down; 33 | cursor_.speed() = 1; 34 | 35 | is_out_of_version_ = false; 36 | 37 | mode_.reset(); 38 | } 39 | 40 | cursor interpreter_state::cursor() const noexcept 41 | { 42 | return cursor_; 43 | } 44 | bool interpreter_state::is_out_of_version() const noexcept 45 | { 46 | return is_out_of_version_; 47 | } 48 | app::mode interpreter_state::mode() const noexcept 49 | { 50 | return mode_; 51 | } 52 | } 53 | 54 | namespace app 55 | { 56 | interpreter::interpreter(const command_line& command_line) 57 | : interpreter(nullptr, std::cin, std::cout, command_line) 58 | {} 59 | interpreter::interpreter(std::istream& input_stream, std::ostream& output_stream, const command_line& command_line) 60 | : interpreter(nullptr, input_stream, output_stream, command_line) 61 | {} 62 | 63 | interpreter::interpreter(debugger* debugger, const command_line& command_line) 64 | : interpreter(debugger, std::cin, std::cout, command_line) 65 | {} 66 | interpreter::interpreter(debugger* debugger, std::istream& input_stream, std::ostream& output_stream, const command_line& command_line) 67 | : debugger_(debugger), input_stream_(input_stream), output_stream_(output_stream) 68 | { 69 | version_ = command_line.option_version(); 70 | 71 | if (version_ == version::none) 72 | throw std::invalid_argument("인수 command_line이 올바르지 않은 값을 갖고 있습니다. (필드 option_version)"); 73 | 74 | reset_namespaces(); 75 | } 76 | 77 | void interpreter::reset_state() noexcept 78 | { 79 | state_.reset(); 80 | } 81 | void interpreter::reset_storages() 82 | { 83 | storages_.reset(); 84 | } 85 | void interpreter::reset_namespaces() 86 | { 87 | namespaces_.clear(); 88 | 89 | namespaces_.push_back( 90 | std::make_shared(code_view(U"")) 91 | ); 92 | } 93 | 94 | namespace_ptr interpreter::create_namespace(const code_view& name) 95 | { 96 | namespace_ptr result = std::make_shared(name); 97 | 98 | return namespaces_.push_back(result), result; 99 | } 100 | namespace_ptr interpreter::create_namespace(app::code&& name) 101 | { 102 | namespace_ptr result = std::make_shared(std::move(name)); 103 | 104 | return namespaces_.push_back(result), result; 105 | } 106 | void interpreter::add_namespace(const namespace_ptr& namespace_info) 107 | { 108 | namespaces_.push_back(namespace_info); 109 | } 110 | void interpreter::remove_namespace(const namespace_ptr& namespace_info) 111 | { 112 | std::vector::iterator iter = 113 | std::find(namespaces_.begin(), namespaces_.end(), namespace_info); 114 | 115 | if (iter != namespaces_.end()) 116 | { 117 | namespaces_.erase(iter); 118 | } 119 | 120 | throw std::invalid_argument("인수 namespace_info가 존재하지 않아 삭제할 수 없습니다."); 121 | } 122 | 123 | const app::code& interpreter::code() const noexcept 124 | { 125 | return code_; 126 | } 127 | void interpreter::code(const app::code_view& new_code) 128 | { 129 | code_ = new_code; 130 | reset_state(); 131 | } 132 | void interpreter::code(app::code&& new_code) noexcept 133 | { 134 | code_ = std::move(new_code); 135 | reset_state(); 136 | } 137 | 138 | const std::vector& interpreter::namespaces() const noexcept 139 | { 140 | return namespaces_; 141 | } 142 | std::vector& interpreter::namespaces() noexcept 143 | { 144 | return namespaces_; 145 | } 146 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #if AHEUIPLUSPLUS_TARGET == 1 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main(int argc, char** argv) 11 | { 12 | #ifdef AHEUIPLUSPLUS_PRINT_BENCHMARK 13 | std::clog << std::fixed << std::setprecision(10); 14 | 15 | const auto benchmark_timepoint_before_parsing_command_line = std::chrono::high_resolution_clock::now(); 16 | #endif 17 | 18 | app::command_line command_line; 19 | if (!command_line.parse(argc, argv)) 20 | { 21 | return EXIT_FAILURE; 22 | } 23 | 24 | #ifdef AHEUIPLUSPLUS_PRINT_BENCHMARK 25 | const auto benchmark_timepoint_after_parsing_command_line = std::chrono::high_resolution_clock::now(); 26 | const std::chrono::duration benchmark_duration_parsing_command_line = 27 | benchmark_timepoint_after_parsing_command_line - benchmark_timepoint_before_parsing_command_line; 28 | std::clog << "[Benchmark] Parsing command line: " << benchmark_duration_parsing_command_line.count() << "s(" << 29 | std::chrono::duration_cast>(benchmark_duration_parsing_command_line).count() << "ms, " << 30 | std::chrono::duration_cast>(benchmark_duration_parsing_command_line).count() << "µs)\n"; 31 | #endif 32 | 33 | return EXIT_SUCCESS; 34 | } 35 | 36 | #endif -------------------------------------------------------------------------------- /src/mode.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace app 6 | { 7 | mode::mode(bool is_integer_mode, bool is_aheui_compatible_mode) noexcept 8 | : is_integer_mode_(is_integer_mode), is_aheui_compatible_mode_(is_aheui_compatible_mode) 9 | {} 10 | mode::mode(app::version minimum_version, app::version maximum_version) 11 | : minimum_version_(minimum_version), maximum_version_(maximum_version) 12 | { 13 | if (minimum_version > maximum_version) 14 | throw std::invalid_argument("인수 minimum_version은 인수 maximum_version보다 낮은 버전이여야 합니다."); 15 | } 16 | mode::mode(bool is_integer_mode, bool is_aheui_compatible_mode, 17 | app::version minimum_version, app::version maximum_version) 18 | : is_integer_mode_(is_integer_mode), is_aheui_compatible_mode_(is_aheui_compatible_mode), 19 | minimum_version_(minimum_version), maximum_version_(maximum_version) 20 | { 21 | if (minimum_version > maximum_version) 22 | throw std::invalid_argument("인수 minimum_version은 인수 maximum_version보다 낮은 버전이여야 합니다."); 23 | } 24 | mode::mode(const app::mode& mode) noexcept 25 | : is_integer_mode_(mode.is_integer_mode_), is_aheui_compatible_mode_(mode.is_aheui_compatible_mode_), 26 | minimum_version_(mode.minimum_version_), maximum_version_(mode.maximum_version_) 27 | {} 28 | 29 | mode& mode::operator=(const mode& mode) noexcept 30 | { 31 | is_integer_mode_ = mode.is_integer_mode_; 32 | is_aheui_compatible_mode_ = mode.is_aheui_compatible_mode_; 33 | minimum_version_ = mode.minimum_version_; 34 | maximum_version_ = mode.maximum_version_; 35 | 36 | return *this; 37 | } 38 | bool mode::operator==(const mode& mode) const noexcept 39 | { 40 | return is_integer_mode_ == mode.is_integer_mode_ && 41 | is_aheui_compatible_mode_ == mode.is_aheui_compatible_mode_ && 42 | minimum_version_ == mode.minimum_version_ && 43 | maximum_version_ == mode.maximum_version_; 44 | } 45 | bool mode::operator!=(const mode& mode) const noexcept 46 | { 47 | return is_integer_mode_ != mode.is_integer_mode_ || 48 | is_aheui_compatible_mode_ != mode.is_aheui_compatible_mode_ || 49 | minimum_version_ != mode.minimum_version_ || 50 | maximum_version_ != mode.maximum_version_; 51 | } 52 | 53 | void mode::reset() noexcept 54 | { 55 | is_integer_mode_ = true; 56 | is_aheui_compatible_mode_ = true; 57 | minimum_version_ = version::none; 58 | maximum_version_ = version::latest; 59 | } 60 | 61 | bool mode::is_integer_mode() const noexcept 62 | { 63 | return is_integer_mode_; 64 | } 65 | void mode::is_integer_mode(bool new_is_integer_mode) noexcept 66 | { 67 | is_integer_mode_ = new_is_integer_mode; 68 | } 69 | bool mode::is_aheui_compatible_mode() const noexcept 70 | { 71 | return is_aheui_compatible_mode_; 72 | } 73 | void mode::is_aheui_compatible_mode(bool new_is_aheui_compatible_mode) noexcept 74 | { 75 | is_aheui_compatible_mode_ = new_is_aheui_compatible_mode; 76 | } 77 | app::version mode::minimum_version() const noexcept 78 | { 79 | return minimum_version_; 80 | } 81 | void mode::minimum_version(app::version new_minimum_version) 82 | { 83 | minimum_version_ = new_minimum_version; 84 | 85 | if (maximum_version_ < new_minimum_version) 86 | throw std::invalid_argument("인수 new_minimum_version은 필드 maximum_version보다 낮은 버전이여야 합니다."); 87 | } 88 | app::version mode::maximum_version() const noexcept 89 | { 90 | return maximum_version_; 91 | } 92 | void mode::maximum_version(app::version new_maximum_version) 93 | { 94 | maximum_version_ = new_maximum_version; 95 | 96 | if (minimum_version_ > new_maximum_version) 97 | throw std::invalid_argument("인수 new_maximum_version은 필드 minimum_version보다 높은 버전이여야 합니다."); 98 | } 99 | } -------------------------------------------------------------------------------- /src/namespace.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace app 6 | { 7 | namespace_info::namespace_info(const code_view& name) 8 | : name_(name) 9 | {} 10 | namespace_info::namespace_info(code&& name) 11 | : name_(std::move(name)) 12 | {} 13 | 14 | code_view namespace_info::name() const 15 | { 16 | return name_; 17 | } 18 | 19 | const std::vector>& namespace_info::namespaces() const noexcept 20 | { 21 | return namespaces_; 22 | } 23 | std::vector>& namespace_info::namespaces() noexcept 24 | { 25 | return namespaces_; 26 | } 27 | const std::vector& namespace_info::functions() const noexcept 28 | { 29 | return functions_; 30 | } 31 | std::vector& namespace_info::functions() noexcept 32 | { 33 | return functions_; 34 | } 35 | } -------------------------------------------------------------------------------- /src/storage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace app 8 | { 9 | list::list(const list& list) 10 | : list_(list.list_), virtual_length_(list.virtual_length_) 11 | {} 12 | list::list(list&& list) noexcept 13 | : list_(std::move(list.list_)), virtual_length_(list.virtual_length_) 14 | { 15 | list.virtual_length_ = 0; 16 | } 17 | 18 | list& list::operator=(const list& list) 19 | { 20 | list_ = list.list_; 21 | virtual_length_ = list.virtual_length_; 22 | 23 | return *this; 24 | } 25 | list& list::operator=(list&& list) noexcept 26 | { 27 | list_ = std::move(list.list_); 28 | virtual_length_ = list.virtual_length_; 29 | 30 | list.virtual_length_ = 0; 31 | 32 | return *this; 33 | } 34 | 35 | storage_type list::type() const noexcept 36 | { 37 | return storage_type::list; 38 | } 39 | 40 | element_ptr list::pop() 41 | { 42 | if (virtual_length_ == 0) 43 | return nullptr; 44 | 45 | std::vector::iterator end = list_.begin() + virtual_length_; 46 | 47 | element_ptr result = *end; 48 | list_.erase(end); 49 | virtual_length_ -= 1; 50 | 51 | return result; 52 | } 53 | element_ptr list::push(const element_ptr& value) 54 | { 55 | return list_.insert(list_.begin() + virtual_length_++, value), value; 56 | } 57 | 58 | element_ptr list::copy() 59 | { 60 | if (virtual_length_ == 0) 61 | return nullptr; 62 | 63 | std::vector::iterator end = list_.begin() + virtual_length_; 64 | element_ptr result = element_ptr(new element(**(end - 1))); 65 | 66 | return push(result), result; 67 | } 68 | void list::move(const element_ptr& value) 69 | { 70 | push(value); 71 | } 72 | void list::swap() 73 | { 74 | if (virtual_length_ < 2) 75 | return; 76 | 77 | std::vector::iterator last_iter = list_.begin() + virtual_length_ - 1; 78 | std::iter_swap(last_iter, last_iter - 1); 79 | } 80 | 81 | std::size_t list::size() const noexcept 82 | { 83 | return virtual_length_; 84 | } 85 | 86 | void list::clear() 87 | { 88 | list_.clear(); 89 | } 90 | 91 | std::size_t list::physical_size() const noexcept 92 | { 93 | return list_.size(); 94 | } 95 | } 96 | 97 | namespace app 98 | { 99 | queue::queue(const queue& queue) 100 | : deque_(queue.deque_) 101 | {} 102 | queue::queue(queue&& queue) noexcept 103 | : deque_(std::move(queue.deque_)) 104 | {} 105 | 106 | queue& queue::operator=(const queue& queue) 107 | { 108 | deque_ = queue.deque_; 109 | 110 | return *this; 111 | } 112 | queue& queue::operator=(queue&& queue) noexcept 113 | { 114 | deque_ = std::move(queue.deque_); 115 | 116 | return *this; 117 | } 118 | 119 | storage_type queue::type() const noexcept 120 | { 121 | return storage_type::queue; 122 | } 123 | 124 | element_ptr queue::pop() 125 | { 126 | if (deque_.size() == 0) 127 | return nullptr; 128 | 129 | element_ptr result = deque_.front(); 130 | deque_.pop_front(); 131 | 132 | return result; 133 | } 134 | element_ptr queue::push(const element_ptr& value) 135 | { 136 | return deque_.push_back(value), value; 137 | } 138 | 139 | element_ptr queue::copy() 140 | { 141 | element_ptr result = element_ptr(new element(*deque_.front())); 142 | 143 | return deque_.push_front(result), result; 144 | } 145 | void queue::move(const element_ptr& value) 146 | { 147 | push(value); 148 | } 149 | void queue::swap() 150 | { 151 | if (deque_.size() < 2) 152 | return; 153 | 154 | std::iter_swap(deque_.begin(), deque_.begin() + 1); 155 | } 156 | 157 | std::size_t queue::size() const noexcept 158 | { 159 | return deque_.size(); 160 | } 161 | 162 | void queue::clear() 163 | { 164 | deque_.clear(); 165 | } 166 | } 167 | 168 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 169 | namespace app 170 | { 171 | pipe::pipe(extension* extension) noexcept 172 | : extension_(extension) 173 | {} 174 | pipe::pipe(const pipe& pipe) noexcept 175 | : extension_(pipe.extension_) 176 | {} 177 | pipe::pipe(pipe&& pipe) noexcept 178 | : extension_(pipe.extension_) 179 | { 180 | pipe.extension_ = nullptr; 181 | } 182 | 183 | pipe& pipe::operator=(const pipe& pipe) noexcept 184 | { 185 | extension_ = pipe.extension_; 186 | 187 | return *this; 188 | } 189 | pipe& pipe::operator=(pipe&& pipe) noexcept 190 | { 191 | extension_ = pipe.extension_; 192 | pipe.extension_ = nullptr; 193 | 194 | return *this; 195 | } 196 | 197 | storage_type pipe::type() const noexcept 198 | { 199 | return storage_type::pipe; 200 | } 201 | 202 | element_ptr pipe::pop() 203 | { 204 | if (!extension_) 205 | return nullptr; 206 | 207 | return extension_->pop(); 208 | } 209 | element_ptr pipe::push(const element_ptr& value) 210 | { 211 | if (!extension_) 212 | return nullptr; 213 | 214 | return extension_->push(value); 215 | } 216 | 217 | element_ptr pipe::copy() 218 | { 219 | if (extension_ && last_sent_value_) 220 | { 221 | return extension_->push(last_sent_value_); 222 | } 223 | else 224 | { 225 | return nullptr; 226 | } 227 | } 228 | void pipe::move(const element_ptr& value) 229 | { 230 | push(value); 231 | } 232 | void pipe::swap() 233 | {} 234 | 235 | std::size_t pipe::size() const noexcept 236 | { 237 | return static_cast(-1); 238 | } 239 | 240 | void pipe::clear() 241 | {} 242 | } 243 | #endif 244 | 245 | namespace app 246 | { 247 | storages::storages() 248 | { 249 | static constexpr std::size_t list_count = 250 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 251 | 26 252 | #else 253 | 27 254 | #endif 255 | ; 256 | 257 | for (std::size_t i = 0; i < list_count; ++i) // List 258 | { 259 | std::vector lists; 260 | lists.emplace_back(new list()); 261 | 262 | storages_.push_back(lists); 263 | } 264 | 265 | std::vector queues; // Queue 266 | queues.emplace_back(new queue()); 267 | 268 | storages_.insert(storages_.begin() + 21, queues); 269 | 270 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 271 | storages_.emplace_back(); // Pipe 272 | 273 | for (std::size_t i = 0; i < 28; ++i) 274 | { 275 | storages_index_.push_back(0); 276 | } 277 | #endif 278 | } 279 | storages::storages(const storages& storages) 280 | : storages_(storages.storages_), storages_index_(storages.storages_index_), selected_storage_(storages.selected_storage_) 281 | {} 282 | storages::storages(storages&& storages) noexcept 283 | : storages_(std::move(storages.storages_)), storages_index_(std::move(storages.storages_index_)), 284 | selected_storage_(storages.selected_storage_) 285 | { 286 | storages.selected_storage_ = 0; 287 | } 288 | 289 | storages& storages::operator=(const storages& storages) 290 | { 291 | storages_ = storages.storages_; 292 | storages_index_ = storages.storages_index_; 293 | selected_storage_ = storages.selected_storage_; 294 | 295 | return *this; 296 | } 297 | storages& storages::operator=(storages&& storages) noexcept 298 | { 299 | storages_ = std::move(storages.storages_); 300 | storages_index_ = std::move(storages.storages_index_); 301 | selected_storage_ = storages.selected_storage_; 302 | 303 | storages.selected_storage_ = 0; 304 | 305 | return *this; 306 | } 307 | 308 | storage_ptr storages::get() const 309 | { 310 | return storages_[selected_storage_][storages_index_[selected_storage_]]; 311 | } 312 | void storages::reset() 313 | { 314 | for (std::vector& storages : storages_) 315 | { 316 | storage_ptr first = storages.front(); 317 | 318 | storages.clear(); 319 | 320 | first->clear(); 321 | storages.push_back(std::move(first)); 322 | } 323 | } 324 | 325 | std::size_t storages::storage_max_index() const noexcept 326 | { 327 | return storages_[selected_storage_].size() - 1; 328 | } 329 | std::size_t storages::storage_index() const noexcept 330 | { 331 | return storages_index_[selected_storage_]; 332 | } 333 | void storages::storage_index(std::size_t new_storage_index) 334 | { 335 | if (new_storage_index > storages_[selected_storage_].size()) 336 | throw std::out_of_range("인수 new_storage_index는 함숫값 app::storages::storage_max_index() const noexcept보다 1 큰 값 이하여야 합니다."); 337 | 338 | if (new_storage_index == storages_[selected_storage_].size()) 339 | { 340 | switch (storages_[selected_storage_][0]->type()) 341 | { 342 | case storage_type::list: 343 | storages_[selected_storage_].emplace_back(new list()); 344 | break; 345 | 346 | case storage_type::queue: 347 | storages_[selected_storage_].emplace_back(new queue()); 348 | break; 349 | 350 | #ifdef AHEUIPLUSPLUS_USE_EXTENSION 351 | case storage_type::pipe: 352 | storages_[selected_storage_].emplace_back(get()); 353 | break; 354 | #endif 355 | } 356 | } 357 | 358 | storages_index_[selected_storage_] = new_storage_index; 359 | } 360 | void storages::selected_storage(std::size_t new_selected_storage) 361 | { 362 | if (new_selected_storage >= 28) 363 | throw std::out_of_range("인수 new_selected_storage는 28 미만이여야 합니다."); 364 | 365 | selected_storage_ = new_selected_storage; 366 | } 367 | } -------------------------------------------------------------------------------- /src/version_.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace app 4 | { 5 | int get_major(app::version version) noexcept 6 | { 7 | switch (version) 8 | { 9 | case app::version::v1_0: 10 | case app::version::v1_1: 11 | return 1; 12 | 13 | case app::version::v2_0: 14 | return 2; 15 | 16 | default: 17 | return -1; 18 | } 19 | } 20 | int get_minor(app::version version) noexcept 21 | { 22 | switch (version) 23 | { 24 | case app::version::v1_0: 25 | case app::version::v2_0: 26 | return 0; 27 | 28 | case app::version::v1_1: 29 | return 1; 30 | 31 | default: 32 | return -1; 33 | } 34 | } 35 | version get_version(int major) noexcept 36 | { 37 | switch (major) 38 | { 39 | case 1: 40 | return version::latest_v1; 41 | 42 | case 2: 43 | return version::latest_v2; 44 | 45 | default: 46 | return version::none; 47 | } 48 | } 49 | version get_version(int major, int minor) noexcept 50 | { 51 | switch (major) 52 | { 53 | case 1: 54 | switch (minor) 55 | { 56 | case 0: 57 | return version::v1_0; 58 | 59 | case 1: 60 | return version::v1_1; 61 | 62 | default: 63 | return version::none; 64 | } 65 | 66 | case 2: 67 | switch (minor) 68 | { 69 | case 0: 70 | return version::v2_0; 71 | 72 | default: 73 | return version::none; 74 | } 75 | 76 | default: 77 | return version::none; 78 | } 79 | } 80 | 81 | bool operator>(app::version lhs, app::version rhs) noexcept 82 | { 83 | return static_cast(lhs) > static_cast(rhs); 84 | } 85 | bool operator>=(app::version lhs, app::version rhs) noexcept 86 | { 87 | return static_cast(lhs) >= static_cast(rhs); 88 | } 89 | bool operator<(app::version lhs, app::version rhs) noexcept 90 | { 91 | return static_cast(lhs) < static_cast(rhs); 92 | } 93 | bool operator<=(app::version lhs, app::version rhs) noexcept 94 | { 95 | return static_cast(lhs) <= static_cast(rhs); 96 | } 97 | } --------------------------------------------------------------------------------