├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md └── src └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [root] 2 | name = "l10n_lint" 3 | version = "0.1.0" 4 | dependencies = [ 5 | "docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 6 | "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 7 | "rustc-serialize 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)", 8 | ] 9 | 10 | [[package]] 11 | name = "aho-corasick" 12 | version = "0.6.3" 13 | source = "registry+https://github.com/rust-lang/crates.io-index" 14 | dependencies = [ 15 | "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 16 | ] 17 | 18 | [[package]] 19 | name = "docopt" 20 | version = "0.7.0" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | dependencies = [ 23 | "lazy_static 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", 24 | "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 25 | "rustc-serialize 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)", 26 | "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 27 | ] 28 | 29 | [[package]] 30 | name = "kernel32-sys" 31 | version = "0.2.2" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | dependencies = [ 34 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", 35 | "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 36 | ] 37 | 38 | [[package]] 39 | name = "lazy_static" 40 | version = "0.2.6" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | 43 | [[package]] 44 | name = "libc" 45 | version = "0.2.21" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | 48 | [[package]] 49 | name = "memchr" 50 | version = "1.0.1" 51 | source = "registry+https://github.com/rust-lang/crates.io-index" 52 | dependencies = [ 53 | "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", 54 | ] 55 | 56 | [[package]] 57 | name = "regex" 58 | version = "0.2.1" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | dependencies = [ 61 | "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", 62 | "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 63 | "regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 64 | "thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", 65 | "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 66 | ] 67 | 68 | [[package]] 69 | name = "regex-syntax" 70 | version = "0.4.0" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | 73 | [[package]] 74 | name = "rustc-serialize" 75 | version = "0.3.23" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | 78 | [[package]] 79 | name = "strsim" 80 | version = "0.6.0" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | 83 | [[package]] 84 | name = "thread-id" 85 | version = "3.0.0" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | dependencies = [ 88 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", 89 | "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", 90 | ] 91 | 92 | [[package]] 93 | name = "thread_local" 94 | version = "0.3.3" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | dependencies = [ 97 | "thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 98 | "unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", 99 | ] 100 | 101 | [[package]] 102 | name = "unreachable" 103 | version = "0.1.1" 104 | source = "registry+https://github.com/rust-lang/crates.io-index" 105 | dependencies = [ 106 | "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", 107 | ] 108 | 109 | [[package]] 110 | name = "utf8-ranges" 111 | version = "1.0.0" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | 114 | [[package]] 115 | name = "void" 116 | version = "1.0.2" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | 119 | [[package]] 120 | name = "winapi" 121 | version = "0.2.8" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | 124 | [[package]] 125 | name = "winapi-build" 126 | version = "0.1.1" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | 129 | [metadata] 130 | "checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699" 131 | "checksum docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab32ea6e284d87987066f21a9e809a73c14720571ef34516f0890b3d355ccfd8" 132 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" 133 | "checksum lazy_static 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2f61b8421c7a4648c391611625d56fdd5c7567da05af1be655fd8cacc643abb3" 134 | "checksum libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)" = "88ee81885f9f04bff991e306fea7c1c60a5f0f9e409e99f6b40e3311a3363135" 135 | "checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4" 136 | "checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01" 137 | "checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457" 138 | "checksum rustc-serialize 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)" = "684ce48436d6465300c9ea783b6b14c4361d6b8dcbb1375b486a69cc19e2dfb0" 139 | "checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" 140 | "checksum thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4437c97558c70d129e40629a5b385b3fb1ffac301e63941335e4d354081ec14a" 141 | "checksum thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c85048c6260d17cf486ceae3282d9fb6b90be220bf5b28c400f5485ffc29f0c7" 142 | "checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91" 143 | "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" 144 | "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" 145 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" 146 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" 147 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "l10n_lint" 3 | version = "0.1.0" 4 | authors = ["Frederic Jacobs "] 5 | description = "iStringsCheck is an utility to verify completeness of .strings (iOS & OS X) translations files and the count of their format attributes." 6 | homepage = "https://github.com/FredericJacobs/iStringsCheck" 7 | repository = "https://github.com/FredericJacobs/iStringsCheck" 8 | readme = "README.md" 9 | keywords = [".strings", "strings", "OSX", "iOS", "translations"] 10 | license = "MIT" 11 | 12 | [dependencies] 13 | docopt = "*" 14 | rustc-serialize = "*" 15 | regex = "*" 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Frederic Jacobs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iStringsCheck 2 | 3 | iStringsCheck is an utility to verify completeness of .strings (iOS & OS X) translations files and the count of their format attributes. 4 | 5 | ## Motivation 6 | 7 | It is common to crowdsource translations using platforms like Transifex. Unfortunately, Transifex [does not verify that translations do contain the same number of occurences of a given formatter](https://twitter.com/transifex/status/601092349381357568). This is particularly an issue with Objective-C and C-related languages that do not provide any kind of memory safety guarantees on formatted strings. 8 | 9 | ## Usage 10 | ``` 11 | istringscheck 12 | ``` 13 | ## Verifications 14 | 15 | Currently iStringsCheck only verifies whether all localization files contain all of the localization keys and that for a given key, the number of occurences of the formatter %@ is the same as a source language. 16 | 17 | ## About format strings attacks 18 | If you are taking input from a user or other untrusted source and displaying it, you need to be careful that your display routines do not process format strings received from the untrusted source. For example, in the following code the syslog standard C library function is used to write a received HTTP request to the system log. Because the syslog function processes format strings, it will process any format strings included in the input packet: 19 | 20 | ``` 21 | /* receiving http packet */ 22 | int size = recv(fd, pktBuf, sizeof(pktBuf), 0); 23 | if (size) { 24 | syslog(LOG_INFO, "Received new HTTP request!"); 25 | syslog(LOG_INFO, pktBuf); 26 | } 27 | ``` 28 | Many format strings can cause problems for applications. For example, suppose an attacker passes the following string in the input packet: 29 | 30 | ``` 31 | "AAAA%08x.%08x.%08x.%08x.%08x.%08x.%08x.%08x.%n" 32 | ``` 33 | 34 | This string retrieves eight items from the stack. Assuming that the format string itself is stored on the stack, depending on the structure of the stack, this might effectively move the stack pointer back to the beginning of the format string. Then the %n token would cause the print function to take the number of bytes written so far and write that value to the memory address stored in the next parameter, which happens to be the format string. Thus, assuming a 32-bit architecture, the AAAA in the format string itself would be treated as the pointer value 0x41414141, and the value at that address would be overwritten with the number 76. 35 | 36 | Doing this will usually cause a crash the next time the system has to access that memory location, but by using a string carefully crafted for a specific device and operating system, the attacker can write arbitrary data to any location. See the manual page for printf for a full description of format string syntax. 37 | 38 | Source: [Secure Coding Guide](https://developer.apple.com/library/mac/documentation/Security/Conceptual/SecureCodingGuide/Articles/ValidatingInput.html) 39 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rustc_serialize; 2 | extern crate docopt; 3 | extern crate regex; 4 | 5 | use docopt::Docopt; 6 | use regex::Regex; 7 | 8 | use std::collections::HashMap; 9 | use std::ffi::OsString; 10 | use std::fs; 11 | use std::path::Path; 12 | use std::path::PathBuf; 13 | use std::process::Command; 14 | 15 | // Write the Docopt usage string. 16 | static USAGE: &'static str = " 17 | Usage: istringscheck 18 | Options: 19 | -h, --help Displays this message. 20 | "; 21 | 22 | #[derive(Debug, RustcDecodable)] 23 | struct Args { 24 | arg_source: String, 25 | arg_translations: String, 26 | } 27 | 28 | 29 | // Ensures that all your translated strings have the same number of format parameters 30 | // as your source strings. 31 | fn main() { 32 | let args: Args = Docopt::new(USAGE) 33 | .and_then(|d| d.decode()) 34 | .unwrap_or_else(|e| e.exit()); 35 | validate_args(&args); 36 | 37 | // Input Strings 38 | let source_filename = &args.arg_source; 39 | let source_strings = hashmap_from_source(path_from_string(source_filename), "utf-8"); 40 | 41 | // Strings to compare to 42 | let mut language_files = language_files_from_dir(&args.arg_translations, source_filename); 43 | 44 | // Don't compare the source strings with itself. 45 | language_files.iter().position(|file| file.to_str().eq(&path_from_string(source_filename).to_str()) ).map(|e| language_files.remove(e)); 46 | 47 | for language_file in language_files { 48 | println!( "Parsing language file {}", language_file.display()); 49 | 50 | let translated_strings = hashmap_from_source(language_file, "utf-8"); 51 | 52 | compare_strings(&source_strings, &translated_strings); 53 | 54 | } 55 | } 56 | 57 | fn compare_strings(source_strings :&HashMap, translated_strings: &HashMap) { 58 | for (key, value) in source_strings { 59 | let translated_value = translated_strings.get(key).unwrap_or_else(|| panic!("Language file is missing strings for key {}", key)); 60 | 61 | if translated_value != value { 62 | println!("This attributed string: {} doesn't have the correct amount of occurences of the format argument", key); 63 | } 64 | } 65 | } 66 | 67 | fn language_files_from_dir(dir_string: &String, source_string: &String) -> Vec { 68 | let mut paths = fs::read_dir(&Path::new(dir_string)).unwrap(); 69 | let mut string_files :Vec = vec![]; 70 | let source_filename = Path::new(source_string).file_name().unwrap_or(&OsString::new()).to_os_string().into_string().unwrap(); 71 | 72 | loop { 73 | match paths.next() { 74 | Some(x) => { 75 | let path = x.unwrap().path(); 76 | let path_string: String = path.to_str().unwrap().to_string(); 77 | 78 | if is_folder(path.clone()){ 79 | for file in &language_files_from_dir(&path_string, source_string) { 80 | string_files.push(file.clone()); 81 | } 82 | } else { 83 | let file_name = path.file_name().unwrap_or(&OsString::new()).to_os_string().into_string().unwrap(); 84 | if file_name == source_filename { 85 | string_files.push(path); 86 | } 87 | } 88 | }, 89 | None => { break } 90 | } 91 | } 92 | 93 | return string_files; 94 | } 95 | 96 | fn hashmap_from_source(source_path: PathBuf, file_encoding: &str) -> HashMap { 97 | let mut source_strings: HashMap = HashMap::new(); 98 | // Apple Strings files are UTF-16 encoded, processing them in UTF-8 99 | let utf8_conversion_out = Command::new("iconv") 100 | .arg("-f").arg(file_encoding) 101 | .arg("-t").arg("utf-8") 102 | .arg(&source_path).output() 103 | .unwrap_or_else(|e| { panic!("failed to execute process: {}", e) }); 104 | 105 | let strings_file = String::from_utf8(utf8_conversion_out.stdout).unwrap(); 106 | let mut lines = strings_file.lines(); 107 | 108 | let string_key_re = match Regex::new(r#"^"(.*)" = "(.*)";$"#) { 109 | Ok(re) => re, 110 | Err(err) => panic!("{}", err), 111 | }; 112 | let format_strings_re = match Regex::new(r#"%"#) { 113 | Ok(re) => re, 114 | Err(err) => panic!("{}", err), 115 | }; 116 | 117 | loop { 118 | match lines.next() { 119 | Some(line) => { 120 | for cap in string_key_re.captures_iter(&line) { 121 | let n_formats = format_strings_re.captures_iter(&cap.get(2).unwrap().as_str()).count(); 122 | source_strings.insert(cap.get(1).unwrap().as_str().to_string(), n_formats); 123 | } 124 | }, 125 | None => { break } 126 | } 127 | } 128 | 129 | return source_strings; 130 | } 131 | 132 | fn path_from_string(path_string: &String) -> PathBuf { 133 | return Path::new(path_string).to_path_buf(); 134 | } 135 | 136 | fn is_file(path :PathBuf) -> bool { 137 | return fs::metadata(path).map(|m| m.is_file()).unwrap_or(false); 138 | } 139 | 140 | fn is_folder(path :PathBuf) -> bool { 141 | return fs::metadata(path).map(|m| m.is_dir()).unwrap_or(false); 142 | } 143 | 144 | // Validates the arguments, checking file and folder exists. 145 | fn validate_args(args: &Args) { 146 | let source_path = path_from_string(&args.arg_source); 147 | let dir_path = path_from_string(&args.arg_translations); 148 | 149 | let source_exists = is_file(source_path); 150 | let dir_exists = is_folder(dir_path); 151 | 152 | if !(source_exists && dir_exists) { 153 | panic!("The arguments passed must be the source localization files 154 | and the folder containing other localizations \n".to_string() + USAGE); 155 | } 156 | } 157 | --------------------------------------------------------------------------------