├── .github ├── dependabot.yml └── workflows │ ├── ci-version.yml │ └── ci.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── opencc ├── HKVariants.ocd2 ├── HKVariantsRev.ocd2 ├── HKVariantsRevPhrases.ocd2 ├── JPShinjitaiCharacters.ocd2 ├── JPShinjitaiPhrases.ocd2 ├── JPVariants.ocd2 ├── JPVariantsRev.ocd2 ├── STCharacters.ocd2 ├── STPhrases.ocd2 ├── TSCharacters.ocd2 ├── TSPhrases.ocd2 ├── TWPhrases.ocd2 ├── TWPhrasesRev.ocd2 ├── TWVariants.ocd2 ├── TWVariantsRev.ocd2 ├── TWVariantsRevPhrases.ocd2 ├── hk2s.json ├── hk2t.json ├── jp2t.json ├── s2hk.json ├── s2t.json ├── s2tw.json ├── s2twp.json ├── t2hk.json ├── t2jp.json ├── t2s.json ├── t2tw.json ├── tw2s.json ├── tw2sp.json └── tw2t.json ├── rustfmt.toml ├── src └── lib.rs └── tests ├── opencc.rs └── static_dictionaries.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" -------------------------------------------------------------------------------- /.github/workflows/ci-version.yml: -------------------------------------------------------------------------------- 1 | name: CI-version 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | 11 | jobs: 12 | tests: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: 17 | - ubuntu-latest 18 | toolchain: 19 | - stable 20 | - nightly 21 | features: 22 | - 23 | - --features static-dictionaries 24 | name: Test ${{ matrix.toolchain }} on ${{ matrix.os }} (${{ matrix.features }}) 25 | runs-on: ${{ matrix.os }} 26 | steps: 27 | - name: Install OpenCC 28 | run: | 29 | sudo apt update 30 | sudo apt install doxygen cmake wget 31 | wget https://github.com/BYVoid/OpenCC/archive/refs/tags/ver.1.1.9.tar.gz 32 | tar xf ver.1.1.9.tar.gz 33 | cd OpenCC-* 34 | make -j$(nproc) PREFIX=/usr 35 | sudo make PREFIX=/usr install 36 | - run: sudo ldconfig 37 | - uses: actions/checkout@v4 38 | - uses: actions-rust-lang/setup-rust-toolchain@v1 39 | with: 40 | toolchain: ${{ matrix.toolchain }} 41 | - run: cargo test --release ${{ matrix.features }} 42 | - run: cargo doc --release ${{ matrix.features }} 43 | 44 | MSRV: 45 | strategy: 46 | fail-fast: false 47 | matrix: 48 | os: 49 | - ubuntu-latest 50 | toolchain: 51 | - "1.61" 52 | features: 53 | - 54 | - --features static-dictionaries 55 | name: Test ${{ matrix.toolchain }} on ${{ matrix.os }} (${{ matrix.features }}) 56 | runs-on: ${{ matrix.os }} 57 | steps: 58 | - name: Install OpenCC 59 | run: | 60 | sudo apt update 61 | sudo apt install doxygen cmake wget 62 | wget https://github.com/BYVoid/OpenCC/archive/refs/tags/ver.1.1.9.tar.gz 63 | tar xf ver.1.1.9.tar.gz 64 | cd OpenCC-* 65 | make -j$(nproc) PREFIX=/usr 66 | sudo make PREFIX=/usr install 67 | - run: sudo ldconfig 68 | - uses: actions/checkout@v4 69 | - uses: actions-rust-lang/setup-rust-toolchain@v1 70 | with: 71 | toolchain: ${{ matrix.toolchain }} 72 | - run: cargo test --release --lib --bins ${{ matrix.features }} -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [ push, pull_request ] 4 | 5 | env: 6 | CARGO_TERM_COLOR: always 7 | 8 | jobs: 9 | rustfmt: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions-rust-lang/setup-rust-toolchain@v1 14 | with: 15 | toolchain: nightly 16 | components: rustfmt 17 | - uses: actions-rust-lang/rustfmt@v1 18 | 19 | clippy: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Install OpenCC 23 | run: | 24 | sudo apt update 25 | sudo apt install doxygen cmake wget 26 | wget https://github.com/BYVoid/OpenCC/archive/refs/tags/ver.1.1.9.tar.gz 27 | tar xf ver.1.1.9.tar.gz 28 | cd OpenCC-* 29 | make -j$(nproc) PREFIX=/usr 30 | sudo make PREFIX=/usr install 31 | - uses: actions/checkout@v4 32 | - uses: actions-rust-lang/setup-rust-toolchain@v1 33 | with: 34 | components: clippy 35 | - run: cargo clippy --all-targets --all-features -- -D warnings 36 | 37 | tests: 38 | strategy: 39 | fail-fast: false 40 | matrix: 41 | os: 42 | - ubuntu-latest 43 | toolchain: 44 | - stable 45 | - nightly 46 | features: 47 | - 48 | - --features static-dictionaries 49 | name: Test ${{ matrix.toolchain }} on ${{ matrix.os }} (${{ matrix.features }}) 50 | runs-on: ${{ matrix.os }} 51 | steps: 52 | - name: Install OpenCC 53 | run: | 54 | sudo apt update 55 | sudo apt install doxygen cmake wget 56 | wget https://github.com/BYVoid/OpenCC/archive/refs/tags/ver.1.1.9.tar.gz 57 | tar xf ver.1.1.9.tar.gz 58 | cd OpenCC-* 59 | make -j$(nproc) PREFIX=/usr 60 | sudo make PREFIX=/usr install 61 | - run: sudo ldconfig 62 | - uses: actions/checkout@v4 63 | - uses: actions-rust-lang/setup-rust-toolchain@v1 64 | with: 65 | toolchain: ${{ matrix.toolchain }} 66 | - run: cargo test ${{ matrix.features }} 67 | - run: cargo doc ${{ matrix.features }} 68 | 69 | MSRV: 70 | strategy: 71 | fail-fast: false 72 | matrix: 73 | os: 74 | - ubuntu-latest 75 | toolchain: 76 | - "1.61" 77 | features: 78 | - 79 | - --features static-dictionaries 80 | name: Test ${{ matrix.toolchain }} on ${{ matrix.os }} (${{ matrix.features }}) 81 | runs-on: ${{ matrix.os }} 82 | steps: 83 | - name: Install OpenCC 84 | run: | 85 | sudo apt update 86 | sudo apt install doxygen cmake wget 87 | wget https://github.com/BYVoid/OpenCC/archive/refs/tags/ver.1.1.9.tar.gz 88 | tar xf ver.1.1.9.tar.gz 89 | cd OpenCC-* 90 | make -j$(nproc) PREFIX=/usr 91 | sudo make PREFIX=/usr install 92 | - run: sudo ldconfig 93 | - uses: actions/checkout@v4 94 | - uses: actions-rust-lang/setup-rust-toolchain@v1 95 | with: 96 | toolchain: ${{ matrix.toolchain }} 97 | - run: cargo test --lib --bins ${{ matrix.features }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*_output/ 2 | 3 | # Created by https://www.gitignore.io/api/intellij+all 4 | 5 | ### Intellij+all ### 6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 8 | 9 | # User-specific stuff 10 | .idea/**/workspace.xml 11 | .idea/**/tasks.xml 12 | .idea/**/usage.statistics.xml 13 | .idea/**/dictionaries 14 | .idea/**/shelf 15 | 16 | # Sensitive or high-churn files 17 | .idea/**/dataSources/ 18 | .idea/**/dataSources.ids 19 | .idea/**/dataSources.local.xml 20 | .idea/**/sqlDataSources.xml 21 | .idea/**/dynamic.xml 22 | .idea/**/uiDesigner.xml 23 | .idea/**/dbnavigator.xml 24 | 25 | # Gradle 26 | .idea/**/gradle.xml 27 | .idea/**/libraries 28 | 29 | # Gradle and Maven with auto-import 30 | # When using Gradle or Maven with auto-import, you should exclude module files, 31 | # since they will be recreated, and may cause churn. Uncomment if using 32 | # auto-import. 33 | # .idea/modules.xml 34 | # .idea/*.iml 35 | # .idea/modules 36 | 37 | # CMake 38 | cmake-build-*/ 39 | 40 | # Mongo Explorer plugin 41 | .idea/**/mongoSettings.xml 42 | 43 | # File-based project format 44 | *.iws 45 | 46 | # IntelliJ 47 | out/ 48 | 49 | # mpeltonen/sbt-idea plugin 50 | .idea_modules/ 51 | 52 | # JIRA plugin 53 | atlassian-ide-plugin.xml 54 | 55 | # Cursive Clojure plugin 56 | .idea/replstate.xml 57 | 58 | # Crashlytics plugin (for Android Studio and IntelliJ) 59 | com_crashlytics_export_strings.xml 60 | crashlytics.properties 61 | crashlytics-build.properties 62 | fabric.properties 63 | 64 | # Editor-based Rest Client 65 | .idea/httpRequests 66 | 67 | ### Intellij+all Patch ### 68 | # Ignores the whole .idea folder and all .iml files 69 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 70 | 71 | .idea/ 72 | 73 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 74 | 75 | *.iml 76 | modules.xml 77 | .idea/misc.xml 78 | *.ipr 79 | 80 | 81 | # End of https://www.gitignore.io/api/intellij+all 82 | 83 | 84 | ### Rust ### 85 | # Generated by Cargo 86 | # will have compiled files and executables 87 | /target/ 88 | 89 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 90 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 91 | Cargo.lock 92 | 93 | # These are backup files generated by rustfmt 94 | **/*.rs.bk 95 | 96 | 97 | # End of https://www.gitignore.io/api/rust 98 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "opencc-rust" 3 | version = "1.1.19" 4 | authors = ["Magic Len "] 5 | edition = "2021" 6 | rust-version = "1.61" 7 | repository = "https://github.com/magiclen/opencc-rust" 8 | homepage = "https://magiclen.org/opencc-rust" 9 | keywords = ["chinese", "opencc", "traditional", "simple"] 10 | categories = ["external-ffi-bindings", "localization"] 11 | description = "Open Chinese Convert(OpenCC, 開放中文轉換) binding for the Rust language for conversion between Traditional Chinese and Simplified Chinese." 12 | license = "Apache-2.0" 13 | include = ["src/**/*", "Cargo.toml", "README.md", "LICENSE", "build.rs", "opencc/**/*"] 14 | 15 | [dependencies] 16 | libc = "0.2.43" 17 | lazy-static-include = { version = "3.1.1", optional = true } 18 | lazy_static = { version = "1.4", optional = true } 19 | 20 | [build-dependencies] 21 | pkg-config = "0.3.14" 22 | 23 | [features] 24 | static-dictionaries = ["lazy-static-include", "lazy_static"] 25 | 26 | [package.metadata.docs.rs] 27 | all-features = true 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015-2018 magiclen.org 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | OpenCC Rust 2 | ==================== 3 | 4 | [![CI](https://github.com/magiclen/opencc-rust/actions/workflows/ci.yml/badge.svg)](https://github.com/magiclen/opencc-rust/actions/workflows/ci.yml) 5 | 6 | Open Chinese Convert(OpenCC, 開放中文轉換) binding for the Rust language for conversion between Traditional Chinese and Simplified Chinese. 7 | 8 | ## Compilation 9 | 10 | To compile this crate, you need to compile the OpenCC C++ library first. You can install OpenCC in your operating system, or in somewhere in your file system. As for the latter, you need to set the following environment variables to link the OpenCC library: 11 | 12 | * `OPENCC_LIB_DIRS`: The directories of library files, like `-L`. Use `:` to separate. 13 | * `OPENCC_LIBS`: The library names that you want to link, like `-l`. Use `:` to separate. Typically, it contains **opencc:marisa**. 14 | * `OPENCC_INCLUDE_DIRS`: The directories of header files, like `-i`. Use `:` to separate. 15 | * `OPENCC_STATIC`: Whether to use `static` or `dylib`. 16 | * `OPENCC_DYLIB_STDCPP`: If you use `static` linking, and your OpenCC library is compiled by the GNU C, this environment variable should be set. 17 | * `OPENCC_STATIC_STDCPP`: If you use `static` linking, and your OpenCC library is compiled by musl libc, this environment variable should be set. 18 | 19 | ## Examples 20 | 21 | ```rust 22 | use opencc_rust::*; 23 | 24 | let opencc = OpenCC::new(DefaultConfig::TW2SP).unwrap(); 25 | 26 | let s = opencc.convert("涼風有訊"); 27 | 28 | assert_eq!("凉风有讯", &s); 29 | 30 | let s = opencc.convert_to_buffer(",秋月無邊", s); 31 | 32 | assert_eq!("凉风有讯,秋月无边", &s); 33 | ``` 34 | 35 | ```rust 36 | use opencc_rust::*; 37 | 38 | let opencc = OpenCC::new(DefaultConfig::S2TWP).unwrap(); 39 | 40 | let s = opencc.convert("凉风有讯"); 41 | 42 | assert_eq!("涼風有訊", &s); 43 | 44 | let s = opencc.convert_to_buffer(",秋月无边", s); 45 | 46 | assert_eq!("涼風有訊,秋月無邊", &s); 47 | ``` 48 | 49 | ## Static Dictionaries 50 | 51 | Usually, OpenCC needs to be executed on an environment where OpenCC is installed. If you want to make it portable, you can enable the `static-dictionaries` feature. 52 | 53 | ```toml 54 | [dependencies.opencc-rust] 55 | version = "*" 56 | features = ["static-dictionaries"] 57 | ``` 58 | Then, the `generate_static_dictionary` and `generate_static_dictionaries` functions are available. 59 | 60 | The default OpenCC dictionaries will be compiled into the binary file by `lazy_static_include` crate. And you can use the two functions to recover them on demand. 61 | 62 | For example, 63 | 64 | ```rust 65 | use opencc_rust::*; 66 | 67 | let output_path = "/path/to/dictionaries-directory"; 68 | 69 | generate_static_dictionary(&output_path, DefaultConfig::TW2SP).unwrap(); 70 | 71 | let opencc = OpenCC::new(Path::join(&output_path, DefaultConfig::TW2SP)).unwrap(); 72 | 73 | assert_eq!("凉风有讯", &opencc.convert("涼風有訊")); 74 | ``` 75 | 76 | ## Supported Platforms 77 | 78 | This crate currently supports **Linux**. Other platforms are not guaranteed. 79 | 80 | ## Crates.io 81 | 82 | https://crates.io/crates/opencc-rust 83 | 84 | ## Documentation 85 | 86 | https://docs.rs/opencc-rust 87 | 88 | ## License 89 | 90 | [Apache-2.0](LICENSE) -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashSet, env, path::PathBuf, process::Command}; 2 | 3 | const MIN_VERSION: &str = "1.1.2"; 4 | const MAX_VERSION: &str = "1.2.0"; 5 | 6 | fn main() { 7 | if env::var("DOCS_RS").is_ok() { 8 | return; 9 | } 10 | 11 | if cfg!(target_os = "freebsd") { 12 | env_var_set_default("OPENCC_INCLUDE_DIRS", "/usr/include/opencc"); 13 | env_var_set_default("OPENCC_LIB_DIRS", "/usr/lib"); 14 | env_var_set_default("OPENCC_LIBS", "opencc"); 15 | } 16 | 17 | let lib_dirs = find_opencc_lib_dirs(); 18 | 19 | for d in &lib_dirs { 20 | if !d.exists() { 21 | panic!("OpenCC library directory does not exist: {}", d.to_string_lossy()); 22 | } 23 | println!("cargo:rustc-link-search=native={}", d.to_string_lossy()); 24 | } 25 | 26 | let include_dirs = find_opencc_include_dirs(); 27 | for d in &include_dirs { 28 | if !d.exists() { 29 | panic!("OpenCC include directory does not exist: {}", d.to_string_lossy()); 30 | } 31 | println!("cargo:include={}", d.to_string_lossy()); 32 | } 33 | println!("cargo:rerun-if-env-changed=OPENCC_LIBS"); 34 | 35 | let target = env::var("TARGET").unwrap(); 36 | let libs_env = env::var("OPENCC_LIBS").ok(); 37 | 38 | let libs = match libs_env { 39 | Some(ref v) => v.split(':').map(|x| x.to_owned()).collect(), 40 | None => { 41 | #[allow(clippy::if_same_then_else)] 42 | if target.contains("windows") { 43 | vec!["opencc".to_string()] // TODO: not sure 44 | } else if target.contains("freebsd") { 45 | vec!["opencc".to_string()] 46 | } else { 47 | run_pkg_config().libs 48 | } 49 | }, 50 | }; 51 | 52 | let kind = determine_mode(&lib_dirs, libs.as_slice()); 53 | for lib in libs.into_iter() { 54 | println!("cargo:rustc-link-lib={}={}", kind, lib); 55 | } 56 | 57 | println!("cargo:rerun-if-env-changed=OPENCC_DYLIB_STDCPP"); 58 | let kind = env::var("OPENCC_DYLIB_STDCPP"); 59 | match kind.as_ref().map(|s| &s[..]) { 60 | Ok("0") => (), 61 | Ok(_) => println!("cargo:rustc-link-lib=dylib=stdc++"), 62 | Err(_) => (), 63 | } 64 | 65 | println!("cargo:rerun-if-env-changed=OPENCC_STATIC_STDCPP"); 66 | let kind = env::var("OPENCC_STATIC_STDCPP"); 67 | match kind.as_ref().map(|s| &s[..]) { 68 | Ok("0") => (), 69 | Ok(_) => println!("cargo:rustc-link-lib=static=stdc++"), 70 | Err(_) => (), 71 | } 72 | } 73 | 74 | fn env_var_set_default(name: &str, value: &str) { 75 | if env::var(name).is_err() { 76 | env::set_var(name, value); 77 | } 78 | } 79 | 80 | fn find_opencc_lib_dirs() -> Vec { 81 | println!("cargo:rerun-if-env-changed=OPENCC_LIB_DIRS"); 82 | 83 | let sep = if cfg!(target_os = "windows") { ";" } else { ":" }; 84 | 85 | env::var("OPENCC_LIB_DIRS") 86 | .map(|x| x.split(sep).map(PathBuf::from).collect::>()) 87 | .or_else(|_| Ok(vec![find_opencc_dir()?.join("lib")])) 88 | .or_else(|_: env::VarError| -> Result<_, env::VarError> { Ok(run_pkg_config().link_paths) }) 89 | .expect("Couldn't find OpenCC library directory") 90 | } 91 | 92 | fn find_opencc_include_dirs() -> Vec { 93 | println!("cargo:rerun-if-env-changed=OPENCC_INCLUDE_DIRS"); 94 | 95 | let sep = if cfg!(target_os = "windows") { ";" } else { ":" }; 96 | 97 | env::var("OPENCC_INCLUDE_DIRS") 98 | .map(|x| x.split(sep).map(PathBuf::from).collect::>()) 99 | .or_else(|_| Ok(vec![find_opencc_dir()?.join("include")])) 100 | .or_else(|_: env::VarError| -> Result<_, env::VarError> { 101 | Ok(run_pkg_config().include_paths) 102 | }) 103 | .expect("Couldn't find OpenCC include directory") 104 | } 105 | 106 | fn find_opencc_dir() -> Result { 107 | println!("cargo:rerun-if-env-changed=OPENCC_DIR"); 108 | env::var("OPENCC_DIR").map(PathBuf::from) 109 | } 110 | 111 | fn determine_mode>(libdirs: &[PathBuf], libs: &[T]) -> &'static str { 112 | println!("cargo:rerun-if-env-changed=OPENCC_STATIC"); 113 | let kind = env::var("OPENCC_STATIC").ok(); 114 | match kind.as_ref().map(|s| &s[..]) { 115 | Some("0") => return "dylib", 116 | Some(_) => return "static", 117 | None => {}, 118 | } 119 | 120 | let files = libdirs 121 | .iter() 122 | .flat_map(|d| d.read_dir().unwrap()) 123 | .map(|e| e.unwrap()) 124 | .map(|e| e.file_name()) 125 | .filter_map(|e| e.into_string().ok()) 126 | .collect::>(); 127 | let can_static = libs.iter().all(|l| { 128 | files.contains(&format!("lib{}.a", l.as_ref())) 129 | || files.contains(&format!("{}.lib", l.as_ref())) 130 | }); 131 | let can_dylib = libs.iter().all(|l| { 132 | files.contains(&format!("lib{}.so", l.as_ref())) 133 | || files.contains(&format!("{}.dll", l.as_ref())) 134 | || files.contains(&format!("lib{}.dylib", l.as_ref())) 135 | }); 136 | 137 | match (can_static, can_dylib) { 138 | (true, false) => return "static", 139 | (false, true) => return "dylib", 140 | (false, false) => { 141 | panic!( 142 | "OpenCC libdirs at `{:?}` do not contain the required files to either statically \ 143 | or dynamically link OpenCC", 144 | libdirs 145 | ); 146 | }, 147 | (true, true) => {}, 148 | } 149 | 150 | "dylib" 151 | } 152 | 153 | fn run_pkg_config() -> pkg_config::Library { 154 | pkg_config::Config::new() 155 | .cargo_metadata(false) 156 | .atleast_version(MIN_VERSION) 157 | .probe("opencc") 158 | .unwrap(); 159 | 160 | if !Command::new("pkg-config") 161 | .arg(format!("--max-version={}", MAX_VERSION)) 162 | .arg("opencc") 163 | .status() 164 | .unwrap() 165 | .success() 166 | { 167 | panic!("OpenCC version must be no higher than {}", MAX_VERSION); 168 | } 169 | 170 | pkg_config::Config::new().cargo_metadata(false).probe("opencc").unwrap() 171 | } 172 | -------------------------------------------------------------------------------- /opencc/HKVariants.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/HKVariants.ocd2 -------------------------------------------------------------------------------- /opencc/HKVariantsRev.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/HKVariantsRev.ocd2 -------------------------------------------------------------------------------- /opencc/HKVariantsRevPhrases.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/HKVariantsRevPhrases.ocd2 -------------------------------------------------------------------------------- /opencc/JPShinjitaiCharacters.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/JPShinjitaiCharacters.ocd2 -------------------------------------------------------------------------------- /opencc/JPShinjitaiPhrases.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/JPShinjitaiPhrases.ocd2 -------------------------------------------------------------------------------- /opencc/JPVariants.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/JPVariants.ocd2 -------------------------------------------------------------------------------- /opencc/JPVariantsRev.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/JPVariantsRev.ocd2 -------------------------------------------------------------------------------- /opencc/STCharacters.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/STCharacters.ocd2 -------------------------------------------------------------------------------- /opencc/STPhrases.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/STPhrases.ocd2 -------------------------------------------------------------------------------- /opencc/TSCharacters.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TSCharacters.ocd2 -------------------------------------------------------------------------------- /opencc/TSPhrases.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TSPhrases.ocd2 -------------------------------------------------------------------------------- /opencc/TWPhrases.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TWPhrases.ocd2 -------------------------------------------------------------------------------- /opencc/TWPhrasesRev.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TWPhrasesRev.ocd2 -------------------------------------------------------------------------------- /opencc/TWVariants.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TWVariants.ocd2 -------------------------------------------------------------------------------- /opencc/TWVariantsRev.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TWVariantsRev.ocd2 -------------------------------------------------------------------------------- /opencc/TWVariantsRevPhrases.ocd2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/magiclen/opencc-rust/4046f0683ddc9a7a82f4ce7a65889b65442ff3fe/opencc/TWVariantsRevPhrases.ocd2 -------------------------------------------------------------------------------- /opencc/hk2s.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "TSPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "HKVariantsRevPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "HKVariantsRev.ocd2" 19 | }] 20 | } 21 | }, { 22 | "dict": { 23 | "type": "group", 24 | "dicts": [{ 25 | "type": "ocd2", 26 | "file": "TSPhrases.ocd2" 27 | }, { 28 | "type": "ocd2", 29 | "file": "TSCharacters.ocd2" 30 | }] 31 | } 32 | }] 33 | } 34 | -------------------------------------------------------------------------------- /opencc/hk2t.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese (Hong Kong variant) to Traditional Chinese", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "HKVariantsRevPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "HKVariantsRevPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "HKVariantsRev.ocd2" 19 | }] 20 | } 21 | }] 22 | } 23 | -------------------------------------------------------------------------------- /opencc/jp2t.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "JPShinjitaiPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "JPShinjitaiPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "JPShinjitaiCharacters.ocd2" 19 | }, { 20 | "type": "ocd2", 21 | "file": "JPVariantsRev.ocd2" 22 | }] 23 | } 24 | }] 25 | } 26 | -------------------------------------------------------------------------------- /opencc/s2hk.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "STPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "STPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "STCharacters.ocd2" 19 | }] 20 | } 21 | }, { 22 | "dict": { 23 | "type": "ocd2", 24 | "file": "HKVariants.ocd2" 25 | } 26 | }] 27 | } 28 | -------------------------------------------------------------------------------- /opencc/s2t.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Simplified Chinese to Traditional Chinese", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "STPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "STPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "STCharacters.ocd2" 19 | }] 20 | } 21 | }] 22 | } 23 | -------------------------------------------------------------------------------- /opencc/s2tw.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Simplified Chinese to Traditional Chinese (Taiwan standard)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "STPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "STPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "STCharacters.ocd2" 19 | }] 20 | } 21 | }, { 22 | "dict": { 23 | "type": "ocd2", 24 | "file": "TWVariants.ocd2" 25 | } 26 | }] 27 | } 28 | -------------------------------------------------------------------------------- /opencc/s2twp.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "STPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "STPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "STCharacters.ocd2" 19 | }] 20 | } 21 | }, { 22 | "dict": { 23 | "type": "ocd2", 24 | "file": "TWPhrases.ocd2" 25 | } 26 | }, { 27 | "dict": { 28 | "type": "ocd2", 29 | "file": "TWVariants.ocd2" 30 | } 31 | }] 32 | } 33 | -------------------------------------------------------------------------------- /opencc/t2hk.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese to Traditional Chinese (Hong Kong variant)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "HKVariants.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "ocd2", 13 | "file": "HKVariants.ocd2" 14 | } 15 | }] 16 | } 17 | -------------------------------------------------------------------------------- /opencc/t2jp.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "JPVariants.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "ocd2", 13 | "file": "JPVariants.ocd2" 14 | } 15 | }] 16 | } 17 | -------------------------------------------------------------------------------- /opencc/t2s.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese to Simplified Chinese", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "TSPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "TSPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "TSCharacters.ocd2" 19 | }] 20 | } 21 | }] 22 | } 23 | -------------------------------------------------------------------------------- /opencc/t2tw.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese to Traditional Chinese (Taiwan standard)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "TWVariants.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "ocd2", 13 | "file": "TWVariants.ocd2" 14 | } 15 | }] 16 | } 17 | -------------------------------------------------------------------------------- /opencc/tw2s.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "TSPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "TWVariantsRevPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "TWVariantsRev.ocd2" 19 | }] 20 | } 21 | }, { 22 | "dict": { 23 | "type": "group", 24 | "dicts": [{ 25 | "type": "ocd2", 26 | "file": "TSPhrases.ocd2" 27 | }, { 28 | "type": "ocd2", 29 | "file": "TSCharacters.ocd2" 30 | }] 31 | } 32 | }] 33 | } 34 | -------------------------------------------------------------------------------- /opencc/tw2sp.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases)", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "TSPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "TWPhrasesRev.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "TWVariantsRevPhrases.ocd2" 19 | }, { 20 | "type": "ocd2", 21 | "file": "TWVariantsRev.ocd2" 22 | }] 23 | } 24 | }, { 25 | "dict": { 26 | "type": "group", 27 | "dicts": [{ 28 | "type": "ocd2", 29 | "file": "TSPhrases.ocd2" 30 | }, { 31 | "type": "ocd2", 32 | "file": "TSCharacters.ocd2" 33 | }] 34 | } 35 | }] 36 | } 37 | -------------------------------------------------------------------------------- /opencc/tw2t.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Traditional Chinese (Taiwan standard) to Traditional Chinese", 3 | "segmentation": { 4 | "type": "mmseg", 5 | "dict": { 6 | "type": "ocd2", 7 | "file": "TWVariantsRevPhrases.ocd2" 8 | } 9 | }, 10 | "conversion_chain": [{ 11 | "dict": { 12 | "type": "group", 13 | "dicts": [{ 14 | "type": "ocd2", 15 | "file": "TWVariantsRevPhrases.ocd2" 16 | }, { 17 | "type": "ocd2", 18 | "file": "TWVariantsRev.ocd2" 19 | }] 20 | } 21 | }] 22 | } 23 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | # array_width = 60 2 | # attr_fn_like_width = 70 3 | binop_separator = "Front" 4 | blank_lines_lower_bound = 0 5 | blank_lines_upper_bound = 1 6 | brace_style = "PreferSameLine" 7 | # chain_width = 60 8 | color = "Auto" 9 | # comment_width = 100 10 | condense_wildcard_suffixes = true 11 | control_brace_style = "AlwaysSameLine" 12 | empty_item_single_line = true 13 | enum_discrim_align_threshold = 80 14 | error_on_line_overflow = false 15 | error_on_unformatted = false 16 | # fn_call_width = 60 17 | fn_params_layout = "Tall" 18 | fn_single_line = false 19 | force_explicit_abi = true 20 | force_multiline_blocks = false 21 | format_code_in_doc_comments = true 22 | doc_comment_code_block_width = 80 23 | format_generated_files = true 24 | format_macro_matchers = true 25 | format_macro_bodies = true 26 | skip_macro_invocations = [] 27 | format_strings = true 28 | hard_tabs = false 29 | hex_literal_case = "Upper" 30 | imports_indent = "Block" 31 | imports_layout = "Mixed" 32 | indent_style = "Block" 33 | inline_attribute_width = 0 34 | match_arm_blocks = true 35 | match_arm_leading_pipes = "Never" 36 | match_block_trailing_comma = true 37 | max_width = 100 38 | merge_derives = true 39 | imports_granularity = "Crate" 40 | newline_style = "Unix" 41 | normalize_comments = false 42 | normalize_doc_attributes = true 43 | overflow_delimited_expr = true 44 | remove_nested_parens = true 45 | reorder_impl_items = true 46 | reorder_imports = true 47 | group_imports = "StdExternalCrate" 48 | reorder_modules = true 49 | short_array_element_width_threshold = 10 50 | # single_line_if_else_max_width = 50 51 | space_after_colon = true 52 | space_before_colon = false 53 | spaces_around_ranges = false 54 | struct_field_align_threshold = 80 55 | struct_lit_single_line = false 56 | # struct_lit_width = 18 57 | # struct_variant_width = 35 58 | tab_spaces = 4 59 | trailing_comma = "Vertical" 60 | trailing_semicolon = true 61 | type_punctuation_density = "Wide" 62 | use_field_init_shorthand = true 63 | use_small_heuristics = "Max" 64 | use_try_shorthand = true 65 | where_single_line = false 66 | wrap_comments = false -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | Open Chinese Convert(OpenCC, 開放中文轉換) binding for the Rust language for conversion between Traditional Chinese and Simplified Chinese. 3 | 4 | ## Compilation 5 | 6 | To compile this crate, you need to compile the OpenCC C++ library first. You can install OpenCC in your operating system, or in somewhere in your file system. As for the latter, you need to set the following environment variables to link the OpenCC library: 7 | 8 | * `OPENCC_LIB_DIRS`: The directories of library files, like `-L`. Use `:` to separate. 9 | * `OPENCC_LIBS`: The library names that you want to link, like `-l`. Use `:` to separate. Typically, it contains **opencc:marisa**. 10 | * `OPENCC_INCLUDE_DIRS`: The directories of header files, like `-i`. Use `:` to separate. 11 | * `OPENCC_STATIC`: Whether to use `static` or `dylib`. 12 | * `OPENCC_DYLIB_STDCPP`: If you use `static` linking, and your OpenCC library is compiled by the GNU C, this environment variable should be set. 13 | 14 | ## Examples 15 | 16 | ```rust 17 | use opencc_rust::*; 18 | 19 | let opencc = OpenCC::new(DefaultConfig::TW2SP).unwrap(); 20 | 21 | let s = opencc.convert("涼風有訊"); 22 | 23 | assert_eq!("凉风有讯", &s); 24 | 25 | let s = opencc.convert_to_buffer(",秋月無邊", s); 26 | 27 | assert_eq!("凉风有讯,秋月无边", &s); 28 | ``` 29 | 30 | ```rust 31 | use opencc_rust::*; 32 | 33 | let opencc = OpenCC::new(DefaultConfig::S2TWP).unwrap(); 34 | 35 | let s = opencc.convert("凉风有讯"); 36 | 37 | assert_eq!("涼風有訊", &s); 38 | 39 | let s = opencc.convert_to_buffer(",秋月无边", s); 40 | 41 | assert_eq!("涼風有訊,秋月無邊", &s); 42 | ``` 43 | 44 | ## Static Dictionaries 45 | 46 | Usually, OpenCC needs to be executed on an environment where OpenCC is installed. If you want to make it portable, you can enable the `static-dictionaries` feature. 47 | 48 | ```toml 49 | [dependencies.opencc-rust] 50 | version = "*" 51 | features = ["static-dictionaries"] 52 | ``` 53 | Then, the `generate_static_dictionary` and `generate_static_dictionaries` functions are available. 54 | 55 | The default OpenCC dictionaries will be compiled into the binary file by `lazy_static_include` crate. And you can use the two functions to recover them on demand. 56 | 57 | For example, 58 | 59 | ```rust,ignore 60 | use opencc_rust::*; 61 | 62 | let output_path = "/path/to/dictionaries-directory"; 63 | 64 | generate_static_dictionary(&output_path, DefaultConfig::TW2SP).unwrap(); 65 | 66 | let opencc = OpenCC::new(Path::join(&output_path, DefaultConfig::TW2SP)).unwrap(); 67 | 68 | assert_eq!("凉风有讯", &opencc.convert("涼風有訊")); 69 | ``` 70 | */ 71 | 72 | #[cfg(feature = "static-dictionaries")] 73 | #[macro_use] 74 | extern crate lazy_static; 75 | 76 | #[cfg(feature = "static-dictionaries")] 77 | #[macro_use] 78 | extern crate lazy_static_include; 79 | 80 | #[cfg(feature = "static-dictionaries")] 81 | use std::fs::{self, File}; 82 | #[cfg(feature = "static-dictionaries")] 83 | use std::io::Write; 84 | use std::{ 85 | ffi::{CStr, CString}, 86 | path::Path, 87 | }; 88 | 89 | use libc::{c_char, c_int, c_void, size_t}; 90 | 91 | #[link(name = "opencc")] 92 | extern "C" { 93 | pub fn opencc_open(config_file_path: *const c_char) -> *mut c_void; 94 | pub fn opencc_close(opencc: *mut c_void) -> c_int; 95 | pub fn opencc_convert_utf8( 96 | opencc: *mut c_void, 97 | input: *const c_char, 98 | length: size_t, 99 | ) -> *mut c_char; 100 | pub fn opencc_convert_utf8_to_buffer( 101 | opencc: *mut c_void, 102 | input: *const c_char, 103 | length: size_t, 104 | output: *mut c_char, 105 | ) -> size_t; 106 | pub fn opencc_convert_utf8_free(str: *mut c_char); 107 | pub fn opencc_error() -> *const c_char; 108 | } 109 | 110 | #[cfg(feature = "static-dictionaries")] 111 | struct SD(&'static str, &'static [u8]); 112 | 113 | #[cfg(feature = "static-dictionaries")] 114 | macro_rules! new_sd_instance { 115 | ($name:ident, $file_name:expr) => { 116 | lazy_static! { 117 | static ref $name: SD = { 118 | lazy_static_include_bytes! { 119 | RES => ("opencc", $file_name) 120 | } 121 | 122 | SD($file_name, &RES) 123 | }; 124 | } 125 | }; 126 | } 127 | 128 | #[cfg(feature = "static-dictionaries")] 129 | new_sd_instance!(HK2S_JSON, "hk2s.json"); 130 | #[cfg(feature = "static-dictionaries")] 131 | new_sd_instance!(HK2T_JSON, "hk2t.json"); 132 | #[cfg(feature = "static-dictionaries")] 133 | new_sd_instance!(HKVARIANTS_OCD, "HKVariants.ocd2"); 134 | #[cfg(feature = "static-dictionaries")] 135 | new_sd_instance!(HKVARIANTS_REV_OCD, "HKVariantsRev.ocd2"); 136 | #[cfg(feature = "static-dictionaries")] 137 | new_sd_instance!(HKVARIANTS_REV_PHRASES_OCD, "HKVariantsRevPhrases.ocd2"); 138 | #[cfg(feature = "static-dictionaries")] 139 | new_sd_instance!(JP2T_JSON, "jp2t.json"); 140 | #[cfg(feature = "static-dictionaries")] 141 | new_sd_instance!(JPSHINJITAI_CHARATERS_OCD, "JPShinjitaiCharacters.ocd2"); 142 | #[cfg(feature = "static-dictionaries")] 143 | new_sd_instance!(JPSHINJITAI_PHRASES_OCD, "JPShinjitaiPhrases.ocd2"); 144 | #[cfg(feature = "static-dictionaries")] 145 | new_sd_instance!(JPVARIANTS_OCD, "JPVariants.ocd2"); 146 | #[cfg(feature = "static-dictionaries")] 147 | new_sd_instance!(JPVARIANTS_REV_OCD, "JPVariantsRev.ocd2"); 148 | #[cfg(feature = "static-dictionaries")] 149 | new_sd_instance!(S2HK_JSON, "s2hk.json"); 150 | #[cfg(feature = "static-dictionaries")] 151 | new_sd_instance!(S2T_JSON, "s2t.json"); 152 | #[cfg(feature = "static-dictionaries")] 153 | new_sd_instance!(S2TW_JSON, "s2tw.json"); 154 | #[cfg(feature = "static-dictionaries")] 155 | new_sd_instance!(S2TWP_JSON, "s2twp.json"); 156 | #[cfg(feature = "static-dictionaries")] 157 | new_sd_instance!(STCHARACTERS_OCD, "STCharacters.ocd2"); 158 | #[cfg(feature = "static-dictionaries")] 159 | new_sd_instance!(STPHRASES_OCD, "STPhrases.ocd2"); 160 | #[cfg(feature = "static-dictionaries")] 161 | new_sd_instance!(T2HK_JSON, "t2hk.json"); 162 | #[cfg(feature = "static-dictionaries")] 163 | new_sd_instance!(T2JP_JSON, "t2jp.json"); 164 | #[cfg(feature = "static-dictionaries")] 165 | new_sd_instance!(T2S_JSON, "t2s.json"); 166 | #[cfg(feature = "static-dictionaries")] 167 | new_sd_instance!(T2TW_JSON, "t2tw.json"); 168 | #[cfg(feature = "static-dictionaries")] 169 | new_sd_instance!(TSCHARACTERS_OCD, "TSCharacters.ocd2"); 170 | #[cfg(feature = "static-dictionaries")] 171 | new_sd_instance!(TSPHRASES_OCD, "TSPhrases.ocd2"); 172 | #[cfg(feature = "static-dictionaries")] 173 | new_sd_instance!(TW2S_JSON, "tw2s.json"); 174 | #[cfg(feature = "static-dictionaries")] 175 | new_sd_instance!(TW2SP_JSON, "tw2sp.json"); 176 | #[cfg(feature = "static-dictionaries")] 177 | new_sd_instance!(TW2T_JSON, "tw2t.json"); 178 | #[cfg(feature = "static-dictionaries")] 179 | new_sd_instance!(TWPHRASES_OCD, "TWPhrases.ocd2"); 180 | #[cfg(feature = "static-dictionaries")] 181 | new_sd_instance!(TWPHRASES_REV_OCD, "TWPhrasesRev.ocd2"); 182 | #[cfg(feature = "static-dictionaries")] 183 | new_sd_instance!(TWVARIANTS_OCD, "TWVariants.ocd2"); 184 | #[cfg(feature = "static-dictionaries")] 185 | new_sd_instance!(TWVARIANTS_REV_OCD, "TWVariantsRev.ocd2"); 186 | #[cfg(feature = "static-dictionaries")] 187 | new_sd_instance!(TWVARIANTS_REV_PHRASES_OCD, "TWVariantsRevPhrases.ocd2"); 188 | 189 | /// Default configs. 190 | #[derive(Debug, Copy, Clone)] 191 | pub enum DefaultConfig { 192 | /// Traditional Chinese (Hong Kong Standard) to Simplified Chinese 193 | HK2S, 194 | /// Traditional Chinese (Hong Kong Standard) to Traditional Chinese 195 | HK2T, 196 | /// New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai) 197 | JP2T, 198 | /// Simplified Chinese to Traditional Chinese 199 | S2T, 200 | /// Simplified Chinese to Traditional Chinese (Taiwan Standard) 201 | S2TW, 202 | /// Simplified Chinese to Traditional Chinese (Taiwan Standard) with Taiwanese idiom 203 | S2TWP, 204 | /// Traditional Chinese (OpenCC Standard) to Hong Kong Standard 205 | T2HK, 206 | /// Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai) 207 | T2JP, 208 | /// Traditional Chinese (OpenCC Standard) to Taiwan Standard 209 | T2TW, 210 | /// Traditional Chinese to Simplified Chinese 211 | T2S, 212 | /// Simplified Chinese to Traditional Chinese (Hong Kong Standard) 213 | S2HK, 214 | /// Traditional Chinese (Taiwan Standard) to Simplified Chinese 215 | TW2S, 216 | /// Traditional Chinese (Taiwan Standard) to Simplified Chinese with Mainland Chinese idiom 217 | TW2SP, 218 | /// Traditional Chinese (Taiwan Standard) to Traditional Chinese 219 | TW2T, 220 | } 221 | 222 | impl DefaultConfig { 223 | /// Get the file name for this default config. 224 | pub fn get_file_name(self) -> &'static str { 225 | match self { 226 | DefaultConfig::HK2S => "hk2s.json", 227 | DefaultConfig::HK2T => "hk2t.json", 228 | DefaultConfig::JP2T => "jp2t.json", 229 | DefaultConfig::S2HK => "s2hk.json", 230 | DefaultConfig::S2T => "s2t.json", 231 | DefaultConfig::S2TW => "s2tw.json", 232 | DefaultConfig::S2TWP => "s2twp.json", 233 | DefaultConfig::T2HK => "t2hk.json", 234 | DefaultConfig::T2JP => "t2jp.json", 235 | DefaultConfig::T2S => "t2s.json", 236 | DefaultConfig::T2TW => "t2tw.json", 237 | DefaultConfig::TW2S => "tw2s.json", 238 | DefaultConfig::TW2SP => "tw2sp.json", 239 | DefaultConfig::TW2T => "tw2t.json", 240 | } 241 | } 242 | } 243 | 244 | impl AsRef for DefaultConfig { 245 | fn as_ref(&self) -> &Path { 246 | Path::new(self.get_file_name()) 247 | } 248 | } 249 | 250 | impl AsRef for DefaultConfig { 251 | fn as_ref(&self) -> &str { 252 | self.get_file_name() 253 | } 254 | } 255 | 256 | /// OpenCC binding for Rust. 257 | pub struct OpenCC { 258 | opencc: *mut c_void, 259 | } 260 | 261 | unsafe impl Send for OpenCC {} 262 | 263 | unsafe impl Sync for OpenCC {} 264 | 265 | impl OpenCC { 266 | /// Create a new OpenCC instance through a file provided by its path. 267 | pub fn new>(config_file_path: P) -> Result { 268 | let config_file_path = 269 | CString::new(config_file_path.as_ref().as_os_str().to_str().unwrap()).unwrap(); 270 | 271 | let opencc = unsafe { opencc_open(config_file_path.as_ptr()) }; 272 | 273 | let v: size_t = opencc as size_t; 274 | if v == !0 { 275 | return Err("Cannot use this config file path."); 276 | } 277 | 278 | Ok(OpenCC { 279 | opencc, 280 | }) 281 | } 282 | 283 | /// Convert a string to another string. 284 | pub fn convert>(&self, input: S) -> String { 285 | let input = input.as_ref(); 286 | 287 | let length = input.len(); 288 | let input = CString::new(input).unwrap(); 289 | 290 | let result_ptr = unsafe { opencc_convert_utf8(self.opencc, input.as_ptr(), length) }; 291 | let result_cstr = unsafe { CStr::from_ptr(result_ptr) }; 292 | let result = result_cstr.to_string_lossy().to_string(); 293 | 294 | unsafe { 295 | opencc_convert_utf8_free(result_ptr); 296 | } 297 | 298 | result 299 | } 300 | 301 | /// Convert a string to another string and store into a buffer. 302 | pub fn convert_to_buffer>(&self, input: S, output: String) -> String { 303 | let input = input.as_ref(); 304 | 305 | let length = input.len(); 306 | let input = CString::new(input).unwrap(); 307 | 308 | let mut output = output.into_bytes(); 309 | let o_len = output.len(); 310 | 311 | output.reserve(length * 2); 312 | 313 | let input_ptr = unsafe { output.as_ptr().add(output.len()) as *mut c_char }; 314 | 315 | let size = unsafe { 316 | opencc_convert_utf8_to_buffer(self.opencc, input.as_ptr(), length, input_ptr) 317 | }; 318 | 319 | unsafe { 320 | output.set_len(o_len + size); 321 | } 322 | 323 | unsafe { String::from_utf8_unchecked(output) } 324 | } 325 | } 326 | 327 | impl Drop for OpenCC { 328 | fn drop(&mut self) { 329 | if !self.opencc.is_null() { 330 | unsafe { 331 | opencc_close(self.opencc); 332 | } 333 | } 334 | } 335 | } 336 | 337 | #[cfg(feature = "static-dictionaries")] 338 | fn generate_static_dictionary_inner>( 339 | path: P, 340 | config: DefaultConfig, 341 | ) -> Result<(), &'static str> { 342 | let path = path.as_ref(); 343 | 344 | let mut output_data: Vec<&SD> = Vec::new(); 345 | 346 | match config { 347 | DefaultConfig::HK2S => { 348 | output_data.push(&HK2S_JSON); 349 | output_data.push(&TSPHRASES_OCD); 350 | output_data.push(&HKVARIANTS_REV_PHRASES_OCD); 351 | output_data.push(&HKVARIANTS_REV_OCD); 352 | output_data.push(&TSCHARACTERS_OCD); 353 | }, 354 | DefaultConfig::HK2T => { 355 | output_data.push(&HK2T_JSON); 356 | output_data.push(&HKVARIANTS_REV_PHRASES_OCD); 357 | output_data.push(&HKVARIANTS_REV_OCD); 358 | }, 359 | DefaultConfig::JP2T => { 360 | output_data.push(&JP2T_JSON); 361 | output_data.push(&JPSHINJITAI_PHRASES_OCD); 362 | output_data.push(&JPSHINJITAI_CHARATERS_OCD); 363 | output_data.push(&JPVARIANTS_REV_OCD); 364 | }, 365 | DefaultConfig::S2HK => { 366 | output_data.push(&S2HK_JSON); 367 | output_data.push(&STPHRASES_OCD); 368 | output_data.push(&STCHARACTERS_OCD); 369 | output_data.push(&HKVARIANTS_OCD); 370 | }, 371 | DefaultConfig::S2T => { 372 | output_data.push(&S2T_JSON); 373 | output_data.push(&STPHRASES_OCD); 374 | output_data.push(&STCHARACTERS_OCD); 375 | }, 376 | DefaultConfig::S2TW => { 377 | output_data.push(&S2TW_JSON); 378 | output_data.push(&STPHRASES_OCD); 379 | output_data.push(&STCHARACTERS_OCD); 380 | output_data.push(&TWVARIANTS_OCD); 381 | }, 382 | DefaultConfig::S2TWP => { 383 | output_data.push(&S2TWP_JSON); 384 | output_data.push(&STPHRASES_OCD); 385 | output_data.push(&STCHARACTERS_OCD); 386 | output_data.push(&TWPHRASES_OCD); 387 | output_data.push(&TWVARIANTS_OCD); 388 | }, 389 | DefaultConfig::T2HK => { 390 | output_data.push(&T2HK_JSON); 391 | output_data.push(&HKVARIANTS_OCD); 392 | }, 393 | DefaultConfig::T2JP => { 394 | output_data.push(&T2JP_JSON); 395 | output_data.push(&JPVARIANTS_OCD); 396 | }, 397 | DefaultConfig::T2S => { 398 | output_data.push(&T2S_JSON); 399 | output_data.push(&TSPHRASES_OCD); 400 | output_data.push(&TSCHARACTERS_OCD); 401 | }, 402 | DefaultConfig::T2TW => { 403 | output_data.push(&T2TW_JSON); 404 | output_data.push(&TWVARIANTS_OCD); 405 | }, 406 | DefaultConfig::TW2S => { 407 | output_data.push(&TW2S_JSON); 408 | output_data.push(&TSPHRASES_OCD); 409 | output_data.push(&TWVARIANTS_REV_PHRASES_OCD); 410 | output_data.push(&TWVARIANTS_REV_OCD); 411 | output_data.push(&TSCHARACTERS_OCD); 412 | }, 413 | DefaultConfig::TW2SP => { 414 | output_data.push(&TW2SP_JSON); 415 | output_data.push(&TSPHRASES_OCD); 416 | output_data.push(&TWPHRASES_REV_OCD); 417 | output_data.push(&TWVARIANTS_REV_PHRASES_OCD); 418 | output_data.push(&TWVARIANTS_REV_OCD); 419 | output_data.push(&TSCHARACTERS_OCD); 420 | }, 421 | DefaultConfig::TW2T => { 422 | output_data.push(&TW2T_JSON); 423 | output_data.push(&TWVARIANTS_REV_PHRASES_OCD); 424 | output_data.push(&TWVARIANTS_REV_OCD); 425 | }, 426 | } 427 | 428 | for data in output_data { 429 | let output_path = path.join(data.0); 430 | 431 | if output_path.exists() { 432 | if output_path.is_file() { 433 | continue; 434 | } else { 435 | return Err("The dictionary is not correct."); 436 | } 437 | } 438 | 439 | let mut file = File::create(output_path).map_err(|_| "Cannot create a new file.")?; 440 | 441 | file.write(data.1).map_err(|_| "Cannot write data to a file.")?; 442 | 443 | file.flush().map_err(|_| "Cannot flush file.")?; 444 | } 445 | 446 | Ok(()) 447 | } 448 | 449 | #[cfg(feature = "static-dictionaries")] 450 | /// Generate files for a specific dictionary. These files are used for opening a new OpenCC instance. 451 | pub fn generate_static_dictionary>( 452 | path: P, 453 | config: DefaultConfig, 454 | ) -> Result<(), &'static str> { 455 | let path = path.as_ref(); 456 | 457 | if path.exists() { 458 | if !path.is_dir() { 459 | return Err("The path of static dictionaries needs to be a directory."); 460 | } 461 | } else { 462 | match fs::create_dir_all(path) { 463 | Ok(_) => (), 464 | Err(_) => return Err("Cannot create new directories."), 465 | } 466 | } 467 | 468 | generate_static_dictionary_inner(path, config) 469 | } 470 | 471 | #[cfg(feature = "static-dictionaries")] 472 | /// Generate files for specific dictionaries. These files are used for opening a new OpenCC instance. 473 | pub fn generate_static_dictionaries>( 474 | path: P, 475 | configs: &[DefaultConfig], 476 | ) -> Result<(), &'static str> { 477 | let path = path.as_ref(); 478 | 479 | if path.exists() { 480 | if !path.is_dir() { 481 | return Err("The path of static dictionaries needs to be a directory."); 482 | } 483 | } else { 484 | match fs::create_dir_all(path) { 485 | Ok(_) => (), 486 | Err(_) => return Err("Cannot create new directories."), 487 | } 488 | } 489 | 490 | for config in configs.iter().copied() { 491 | generate_static_dictionary_inner(path, config)? 492 | } 493 | 494 | Ok(()) 495 | } 496 | -------------------------------------------------------------------------------- /tests/opencc.rs: -------------------------------------------------------------------------------- 1 | use opencc_rust::{DefaultConfig, OpenCC}; 2 | 3 | #[test] 4 | fn tw2sp() { 5 | let opencc = OpenCC::new(DefaultConfig::TW2SP).unwrap(); 6 | assert_eq!( 7 | "凉风有讯,秋月无边,亏我思娇的情绪好比度日如年。虽然我不是玉树临风,潇洒倜傥,\ 8 | 但我有广阔的胸襟,加强劲的臂弯。", 9 | &opencc.convert( 10 | "涼風有訊,秋月無邊,虧我思嬌的情緒好比度日如年。雖然我不是玉樹臨風,瀟灑倜儻,\ 11 | 但我有廣闊的胸襟,加強勁的臂彎。" 12 | ) 13 | ); 14 | } 15 | 16 | #[test] 17 | fn tw2sp_to_buffer() { 18 | let s = String::from("涼風有訊,秋月無邊,虧我思嬌的情緒好比度日如年。"); 19 | 20 | let opencc = OpenCC::new(DefaultConfig::TW2SP).unwrap(); 21 | let s = opencc 22 | .convert_to_buffer("雖然我不是玉樹臨風,瀟灑倜儻,但我有廣闊的胸襟,加強勁的臂彎。", s); 23 | 24 | assert_eq!( 25 | "涼風有訊,秋月無邊,虧我思嬌的情緒好比度日如年。虽然我不是玉树临风,潇洒倜傥,\ 26 | 但我有广阔的胸襟,加强劲的臂弯。", 27 | &s 28 | ); 29 | } 30 | 31 | #[test] 32 | fn s2twp() { 33 | let opencc = OpenCC::new(DefaultConfig::S2TWP).unwrap(); 34 | assert_eq!( 35 | "涼風有訊,秋月無邊,虧我思嬌的情緒好比度日如年。雖然我不是玉樹臨風,瀟灑倜儻,\ 36 | 但我有廣闊的胸襟,加強勁的臂彎。", 37 | &opencc.convert( 38 | "凉风有讯,秋月无边,亏我思娇的情绪好比度日如年。虽然我不是玉树临风,潇洒倜傥,\ 39 | 但我有广阔的胸襟,加强劲的臂弯。" 40 | ) 41 | ); 42 | } 43 | 44 | #[test] 45 | fn s2twp_to_buffer() { 46 | let s = String::from("凉风有讯,秋月无边,亏我思娇的情绪好比度日如年。"); 47 | 48 | let opencc = OpenCC::new(DefaultConfig::S2TWP).unwrap(); 49 | let s = opencc 50 | .convert_to_buffer("虽然我不是玉树临风,潇洒倜傥,但我有广阔的胸襟,加强劲的臂弯。", s); 51 | 52 | assert_eq!( 53 | "凉风有讯,秋月无边,亏我思娇的情绪好比度日如年。雖然我不是玉樹臨風,瀟灑倜儻,\ 54 | 但我有廣闊的胸襟,加強勁的臂彎。", 55 | &s 56 | ); 57 | } 58 | -------------------------------------------------------------------------------- /tests/static_dictionaries.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "static-dictionaries")] 2 | 3 | use std::{env, path::Path}; 4 | 5 | use opencc_rust::{DefaultConfig, OpenCC}; 6 | 7 | #[test] 8 | fn generate_static_dictionary() { 9 | let cwd = env::current_dir().unwrap(); 10 | 11 | let output_path = Path::join(&cwd, "dict_output"); 12 | 13 | opencc_rust::generate_static_dictionary(&output_path, DefaultConfig::TW2SP).unwrap(); 14 | 15 | let s = String::from("無"); 16 | 17 | let opencc = OpenCC::new(Path::join(&output_path, DefaultConfig::TW2SP)).unwrap(); 18 | 19 | assert_eq!("无", &opencc.convert(s)); 20 | } 21 | --------------------------------------------------------------------------------