├── Makefile ├── README.md └── utofu.c /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -std=c99 -Wall -Wextra -Wpedantic -Wshadow `pkg-config --cflags icu-uc icu-io sqlite3` 2 | LDFLAGS = `pkg-config --libs icu-uc icu-io sqlite3` 3 | 4 | all: utofu 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Unicode TOFU 2 | 3 | Check unicode strings to detect changes over time that might be spoofing. 4 | 5 | ```bash 6 | # save string to history 7 | echo "hi" | ./utofu my-history 8 | 9 | # attempting to save this one causes a problem 10 | echo "hı" | ./utofu my-history 11 | FAILURE: string is confusable with previous value 12 | Previous: hi 13 | Current : hı 14 | ``` 15 | 16 | The concept is similar to how you trust SSH key fingerprints the first time they are used. If the fingerprint ever changes, SSH fails. With utofu you trust strings and save them in a single-file database. Attempting to save a new string which is confusable with one already in the database causes an error. 17 | 18 | #### How does it work? 19 | 20 | The program relies on libicu's Unicode security and [spoofing detection](http://icu-project.org/apiref/icu4c/uspoof_8h.html). For each line from standard input, the program executes these steps: 21 | 22 | 1. Read line as UTF-8 23 | 1. Convert to [Normalization Form C](http://unicode.org/reports/tr15/#Norm_Forms) for consistency 24 | 1. Calculate skeleton string (confusable strings have the same skeleton) 25 | 1. Insert UTF-8 version of normalized input and its skeleton into the database if the skeleton doesn't already exist 26 | 1. Compare the normalized input string with the string in the database with corresponding skeleton. If not an exact match die with an error. 27 | -------------------------------------------------------------------------------- /utofu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #define BUFSZ 2048 12 | 13 | sqlite3 *db = NULL; 14 | 15 | void db_close(void) 16 | { 17 | sqlite3_close(db); 18 | sqlite3_shutdown(); 19 | } 20 | 21 | int is_newline(UChar c) 22 | { 23 | /* ascii newline or U+2028 LINE SEPARATOR */ 24 | return c == '\n' || c == 0x2028; 25 | } 26 | 27 | sqlite3_stmt *prepare_stmt(const char *sql) 28 | { 29 | int err; 30 | sqlite3_stmt *ret; 31 | 32 | err = sqlite3_prepare_v2(db, sql, -1, &ret, NULL); 33 | if (err != SQLITE_OK) 34 | { 35 | fprintf(stderr, 36 | "Unable to prepare query: %s\n" 37 | "Query is: %s\n", 38 | sqlite3_errstr(err), sql); 39 | exit(EXIT_FAILURE); 40 | } 41 | return ret; 42 | } 43 | 44 | int main(int argc, char **argv) 45 | { 46 | UErrorCode status = U_ZERO_ERROR; 47 | UNormalizer2 *norm; 48 | USpoofChecker *spoof; 49 | UChar line[BUFSZ], normalized[BUFSZ]; 50 | char utf8_norm[BUFSZ*2], utf8_skel[BUFSZ*2]; 51 | const unsigned char* expected; 52 | sqlite3_stmt *insert_stmt, *lookup_stmt; 53 | UFILE *in; 54 | int32_t length; 55 | int err; 56 | 57 | if (argc != 2) 58 | { 59 | fprintf(stderr, "Usage: %s history-file\n", argv[0]); 60 | return EXIT_FAILURE; 61 | } 62 | 63 | if ((err = sqlite3_initialize()) != SQLITE_OK) 64 | { 65 | fprintf(stderr, "Unable to initialize sqlite3: %s\n", sqlite3_errstr(err)); 66 | return EXIT_FAILURE; 67 | } 68 | atexit(db_close); 69 | 70 | if ((err = sqlite3_open(argv[1], &db)) != SQLITE_OK) 71 | { 72 | fprintf(stderr, "Unable to open db \"%s\": %s\n", argv[1], sqlite3_errstr(err)); 73 | return EXIT_FAILURE; 74 | } 75 | 76 | /* if this fails because table already exists, no prob */ 77 | sqlite3_exec(db, 78 | "CREATE TABLE corpus (" 79 | " word TEXT NOT NULL," 80 | " skel TEXT PRIMARY KEY" 81 | ");", 82 | NULL, NULL, NULL); 83 | 84 | if (!(in = u_finit(stdin, NULL, NULL))) 85 | { 86 | fputs("Error opening stdin as UFILE\n", stderr); 87 | return EXIT_FAILURE; 88 | } 89 | 90 | norm = (UNormalizer2 *)unorm2_getNFCInstance(&status); 91 | if (U_FAILURE(status)) { 92 | fprintf(stderr, 93 | "unorm2_getNFCInstance(): %s\n", 94 | u_errorName(status)); 95 | return EXIT_FAILURE; 96 | } 97 | 98 | spoof = uspoof_open(&status); 99 | if (U_FAILURE(status)) { 100 | fprintf(stderr, "uspoof_open(): %s\n", u_errorName(status)); 101 | return EXIT_FAILURE; 102 | } 103 | 104 | lookup_stmt = prepare_stmt( 105 | "SELECT word FROM corpus WHERE skel = ?;"); 106 | insert_stmt = prepare_stmt( 107 | "INSERT OR IGNORE INTO corpus(word,skel) VALUES(?,?);"); 108 | 109 | while (U_SUCCESS(status) && u_fgets(line, BUFSZ, in)) 110 | { 111 | if (is_newline(line[0]) || line[0] == '\0') 112 | continue; 113 | unorm2_normalize(norm, line, -1, 114 | normalized, BUFSZ, &status); 115 | u_strToUTF8(utf8_norm, BUFSZ*2, &length, 116 | normalized, -1, &status); 117 | /* cheap chomp, already have the length */ 118 | if (is_newline(utf8_norm[length-1])) 119 | utf8_norm[length-1] = '\0'; 120 | uspoof_getSkeletonUTF8(spoof, 0, utf8_norm, -1, 121 | utf8_skel, BUFSZ*2, &status); 122 | 123 | sqlite3_bind_text(insert_stmt, 1, utf8_norm, -1, SQLITE_STATIC); 124 | sqlite3_bind_text(insert_stmt, 2, utf8_skel, -1, SQLITE_STATIC); 125 | 126 | sqlite3_bind_text(lookup_stmt, 1, utf8_skel, -1, SQLITE_STATIC); 127 | 128 | /* no RETURNING clause for INSERT, need a transaction, grr */ 129 | sqlite3_exec(db, "BEGIN;", NULL, NULL, NULL); 130 | sqlite3_step(insert_stmt); 131 | err = sqlite3_step(lookup_stmt); 132 | sqlite3_exec(db, "COMMIT;", NULL, NULL, NULL); 133 | 134 | if (err != SQLITE_ROW) 135 | { 136 | fprintf(stderr, 137 | "Could not find inserted value.\n" 138 | "Should not happen.\n"); 139 | sqlite3_reset(insert_stmt); 140 | sqlite3_reset(lookup_stmt); 141 | continue; 142 | } 143 | 144 | expected = sqlite3_column_text(lookup_stmt, 0); 145 | if (strcmp((const char *)expected, utf8_norm) != 0) 146 | { 147 | fprintf(stderr, 148 | "FAILURE: string is confusable with previous value\n" 149 | "Previous: %s\n" 150 | "Current : %s\n", 151 | expected, utf8_norm); 152 | 153 | sqlite3_finalize(insert_stmt); 154 | sqlite3_finalize(lookup_stmt); 155 | u_fclose(in); 156 | exit(EXIT_FAILURE); 157 | } 158 | 159 | sqlite3_reset(insert_stmt); 160 | sqlite3_reset(lookup_stmt); 161 | } 162 | sqlite3_finalize(insert_stmt); 163 | sqlite3_finalize(lookup_stmt); 164 | 165 | u_fclose(in); 166 | return U_FAILURE(status) ? EXIT_FAILURE : EXIT_SUCCESS; 167 | } 168 | --------------------------------------------------------------------------------