├── Makefile ├── README.md ├── b64enc.c ├── base64.cry ├── base64encode.c ├── base64encode.h └── proof ├── Makefile ├── proof-template.cry ├── proof.scr └── sym_encode.c /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -O3 -Wall -Wextra -pedantic -std=c99 2 | 3 | b64enc: b64enc.c base64encode.c 4 | $(CC) $(CFLAGS) $(CPPFLAGS) $^ -o $@ 5 | 6 | clean: 7 | rm -f b64enc 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This project provides the following: 2 | 3 | * `*.c` — a fast base64 encoding routine based on [libb64](http://libb64.sourceforge.net/). 4 | * `base64.cry` — a Cryptol specification of base64. 5 | * `proof/` — a proof that the C code is correct with respect to the Cryptol spec. 6 | 7 | You can read about the proof here: 8 | 9 | https://galois.com/blog/2013/09/high-assurance-base64/ 10 | 11 | # Usage 12 | 13 | To build the C code: 14 | 15 | $ make 16 | 17 | To run the C code: 18 | 19 | $ echo -n 'foobar' | ./b64enc - 20 | Zm9vYmFy 21 | 22 | # License 23 | 24 | C code: public domain, without any warranty. 25 | Everything else: MIT license. 26 | -------------------------------------------------------------------------------- /b64enc.c: -------------------------------------------------------------------------------- 1 | /* 2 | base64 encoder by David Lazar 3 | 4 | Based off of blakesum.c 5 | */ 6 | #include 7 | #include 8 | #include 9 | 10 | #include "base64encode.h" 11 | 12 | #define BUFSIZE 32 * 1024 13 | 14 | int main(int argc, char **argv) { 15 | uint8_t buf[BUFSIZE]; 16 | char enc[2 * BUFSIZE]; 17 | size_t c, i; 18 | char *file; 19 | FILE *fp; 20 | 21 | 22 | if (argc < 2) { 23 | printf("Usage: %s \n", argv[0]); 24 | return 1; 25 | } else { 26 | file = argv[1]; 27 | } 28 | 29 | if (strcmp(file, "-") == 0) { 30 | fp = stdin; 31 | freopen(NULL, "rb", stdin); 32 | } else if ((fp = fopen(file, "rb")) == NULL) { 33 | printf("cannot open %s\n", file); 34 | return 1; 35 | } 36 | 37 | base64_encodestate S; 38 | base64_encode_init(&S); 39 | 40 | while ((c = fread(buf, 1, BUFSIZE, fp)) > 0) { 41 | i = base64_encode_update(&S, buf, c, enc); 42 | fwrite(enc, 1, i, stdout); 43 | 44 | if (feof(fp)) 45 | break; 46 | } 47 | if (ferror(fp)) { 48 | printf("%s: read error\n", file); 49 | if (fp != stdin) 50 | fclose(fp); 51 | return 1; 52 | } 53 | if (fp != stdin) 54 | fclose(fp); 55 | 56 | i = base64_encode_final(&S, enc); 57 | fwrite(enc, 1, i, stdout); 58 | fprintf(stdout, "\n"); 59 | 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /base64.cry: -------------------------------------------------------------------------------- 1 | /* base64 specification 2 | * Author: David Lazar 3 | * 4 | * Based on RFC 4648: 5 | * https://tools.ietf.org/html/rfc4648 6 | */ 7 | 8 | 9 | //////////////////////////////////////////////////////////////////////// 10 | // base64 alphabet 11 | //////////////////////////////////////////////////////////////////////// 12 | 13 | 14 | alphabet : [64][8]; 15 | alphabet = ['A' .. 'Z'] # ['a' .. 'z'] # ['0' .. '9'] # ['+' '/']; 16 | 17 | alphabet' : [256][6]; 18 | alphabet' = (zero : [43][6]) # [62 0 0 0 63] # [52 .. 61] # (zero : [7][6]) # [0 .. 26] # (zero : [5][6]) # [26 .. 51] # zero; 19 | 20 | inAlphabet : [8] -> Bit; 21 | inAlphabet x = ((x >= 'A') & (x <= 'Z')) 22 | | ((x >= 'a') & (x <= 'z')) 23 | | ((x >= '0') & (x <= '9')) 24 | | (x == '+') | (x == '/'); 25 | 26 | theorem alphabetLeftInv : {x}. alphabet' @ (alphabet @ (x:[6])) == x; 27 | 28 | theorem alphabetRightInv : {x}. 29 | if inAlphabet x 30 | then alphabet @ (alphabet' @ x) == x 31 | else True; 32 | 33 | 34 | 35 | //////////////////////////////////////////////////////////////////////// 36 | // Core base64 encode and decode functions. These do not handle padding. 37 | //////////////////////////////////////////////////////////////////////// 38 | 39 | 40 | encode : {a c} (fin a, c == (8 * a + 5) / 6, 6 * c >= 8 * a) => [a][8] -> [c][8]; 41 | encode xs = [| alphabet @ x || x <- groupBy(6, join xs # zero) |]; 42 | 43 | encodeLE : {a c} (fin a, fin c, c == (8 * a + 5) / 6, 6 * c >= 8 * a) => [a][8] -> [c][8]; 44 | encodeLE xs = [| alphabet @ reverse x || x <- groupBy(6, join rxs # zero) |] 45 | where rxs = [| reverse x || x <- xs |]; 46 | 47 | decode : {a c} (fin a, a == (6 * c - 5) / 8 + 1, 6 * c >= 8 * a) => [c][8] -> [a][8]; 48 | decode xs = groupBy(8, take(`a * 8, join [| alphabet' @ x || x <- xs |])); 49 | 50 | decodeLE : {a c} (fin a, a == (6 * c - 5) / 8 + 1, 6 * c >= 8 * a) => [c][8] -> [a][8]; 51 | decodeLE xs = [| reverse b || b <- groupBy(8, take(`a * 8, join [| reverse (alphabet' @ x) || x <- xs |])) |]; 52 | 53 | // polymorphic theorems 54 | theorem encodeLeftInv: {x}. decode (encode x) == x; 55 | 56 | theorem encodeLELeftInv: {x}. decodeLE (encodeLE x) == x; 57 | 58 | // NOTE: encode is non-surjective, so it does not have a right inverse. 59 | // Similarly, decode is non-injective: 60 | // decode "//" -> [0xff] 61 | // decode "/w" -> [0xff] 62 | 63 | 64 | //////////////////////////////////////////////////////////////////////// 65 | // base64 encode and decode functions that handle padding 66 | //////////////////////////////////////////////////////////////////////// 67 | 68 | // NOTE: we use the LE functions since Cryptol is little endian by default. 69 | 70 | b64encode : {a r} 71 | ( fin a, fin r 72 | , r == 4 * ((a + 2) / 3) 73 | // inferred: 74 | , r >= (8 * a + 5) / 6 75 | , 6 * ((8 * a + 5) / 6) >= 8 * a 76 | ) => [a][8] -> [r][8]; 77 | b64encode xs = encodeLE xs # padding 78 | where { 79 | padding : {z} (fin z) => [z][8]; 80 | padding = take(`z, "=" # padding); 81 | }; 82 | 83 | 84 | // The length of the decoded message 'a' depends on the value of the 85 | // input string (in particular the number of '=' symbols padding the 86 | // input). Cryptol's type system is not powerful enough to express 87 | // this dependence, so either 'pad' or 'a' must be passed explicitly: 88 | // 89 | // b64decode `{a=5} "aGVsbG8=" --> "hello" 90 | // b64decode `{pad=1} "aGVsbG8=" --> "hello" 91 | // 92 | b64decode : {pad a r c} 93 | ( fin a, fin r, fin pad 94 | , a + pad == (6 * r - 5) / 8 + 1 95 | , pad >= 0, 2 >= pad 96 | // inferred: 97 | , 6 * r >= 8 * ((6 * r - 5) / 8) + 8 98 | ) => [r][8] -> [a][8]; 99 | b64decode xs = reverse (drop(`pad, reverse (decodeLE xs))); 100 | 101 | 102 | // polymorphic theorem 103 | b64encodeLeftInv : {a} 104 | ( fin a 105 | // inferred: 106 | , 2 >= (24*((a+2)/3)-5)/8-a+1 107 | , 6*((8*a+5)/6) >= 8*a 108 | , 4*((a+2)/3) >= (8*a+5)/6 109 | , 24*((a+2)/3) >= 8*((24*((a+2)/3)-5)/8)+8 110 | , (24*((a+2)/3)-5)/8-a+1 >= 0 111 | ) => [a][8] -> Bit; 112 | theorem b64encodeLeftInv: {x}. b64decode (b64encode x) == x; 113 | 114 | 115 | 116 | //////////////////////////////////////////////////////////////////////// 117 | // test cases 118 | //////////////////////////////////////////////////////////////////////// 119 | 120 | 121 | tests = [te1 te2 te3 te4 te5 te6 te7 td1 td2 td3 td4 td5 td6 td7]; 122 | 123 | theorem testsPass: tests == ~zero; 124 | 125 | // Test vectors from RFC 4648: https://tools.ietf.org/html/rfc4648#section-10 126 | te1 = b64encode "" == ""; 127 | te2 = b64encode "f" == "Zg=="; 128 | te3 = b64encode "fo" == "Zm8="; 129 | te4 = b64encode "foo" == "Zm9v"; 130 | te5 = b64encode "foob" == "Zm9vYg=="; 131 | te6 = b64encode "fooba" == "Zm9vYmE="; 132 | te7 = b64encode "foobar" == "Zm9vYmFy"; 133 | 134 | td1 = b64decode`{pad=0} "" == ""; 135 | td2 = b64decode`{pad=2} "Zg==" == "f"; 136 | td3 = b64decode`{pad=1} "Zm8=" == "fo"; 137 | td4 = b64decode`{pad=0} "Zm9v" == "foo"; 138 | td5 = b64decode`{pad=2} "Zm9vYg==" == "foob"; 139 | td6 = b64decode`{pad=1} "Zm9vYmE=" == "fooba"; 140 | td7 = b64decode`{pad=0} "Zm9vYmFy" == "foobar"; 141 | -------------------------------------------------------------------------------- /base64encode.c: -------------------------------------------------------------------------------- 1 | /* 2 | base64encode.c - modified by David Lazar 3 | 4 | Originally: 5 | cencoder.c - c source to a base64 encoding algorithm implementation 6 | 7 | This is part of the libb64 project, and has been placed in the public domain. 8 | For details, see http://sourceforge.net/projects/libb64 9 | */ 10 | 11 | #include 12 | #include 13 | #include "base64encode.h" 14 | 15 | void base64_encode_init(base64_encodestate *S) { 16 | S->step = step_A; 17 | S->result = 0; 18 | } 19 | 20 | char base64_encode_value(uint8_t value) { 21 | static const char* encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 22 | if (value > 63) return '='; 23 | return encoding[value]; 24 | } 25 | 26 | ptrdiff_t base64_encode_update(base64_encodestate *S, const uint8_t *data, uint64_t datalen, char *encoded) { 27 | char *encoded_begin = encoded; 28 | const uint8_t *currbyte = data; 29 | const uint8_t *data_end = data + datalen; 30 | uint8_t result; 31 | uint8_t fragment; 32 | 33 | result = S->result; 34 | 35 | switch (S->step) { 36 | while (1) { 37 | case step_A: 38 | if (currbyte == data_end) { 39 | S->result = result; 40 | S->step = step_A; 41 | return encoded - encoded_begin; 42 | } 43 | fragment = *currbyte++; 44 | result = (fragment & 0x0fc) >> 2; 45 | *encoded++ = base64_encode_value(result); 46 | result = (fragment & 0x003) << 4; 47 | case step_B: 48 | if (currbyte == data_end) { 49 | S->result = result; 50 | S->step = step_B; 51 | return encoded - encoded_begin; 52 | } 53 | fragment = *currbyte++; 54 | result |= (fragment & 0x0f0) >> 4; 55 | *encoded++ = base64_encode_value(result); 56 | result = (fragment & 0x00f) << 2; 57 | case step_C: 58 | if (currbyte == data_end) { 59 | S->result = result; 60 | S->step = step_C; 61 | return encoded - encoded_begin; 62 | } 63 | fragment = *currbyte++; 64 | result |= (fragment & 0x0c0) >> 6; 65 | *encoded++ = base64_encode_value(result); 66 | result = (fragment & 0x03f) >> 0; 67 | *encoded++ = base64_encode_value(result); 68 | } 69 | } 70 | 71 | // control flow shouldn't reach here 72 | return encoded - encoded_begin; 73 | } 74 | 75 | ptrdiff_t base64_encode_final(base64_encodestate *S, char *encoded) { 76 | char *encoded_begin = encoded; 77 | 78 | switch (S->step) { 79 | case step_B: 80 | *encoded++ = base64_encode_value(S->result); 81 | *encoded++ = '='; 82 | *encoded++ = '='; 83 | break; 84 | case step_C: 85 | *encoded++ = base64_encode_value(S->result); 86 | *encoded++ = '='; 87 | break; 88 | case step_A: 89 | break; 90 | } 91 | 92 | return encoded - encoded_begin; 93 | } 94 | 95 | ptrdiff_t base64_encode(const uint8_t *data, uint64_t datalen, char *encoded) { 96 | ptrdiff_t c = 0; 97 | 98 | base64_encodestate S; 99 | base64_encode_init(&S); 100 | c += base64_encode_update(&S, data, datalen, encoded); 101 | c += base64_encode_final(&S, encoded + c); 102 | 103 | return c; 104 | } 105 | -------------------------------------------------------------------------------- /base64encode.h: -------------------------------------------------------------------------------- 1 | /* 2 | base64encode.h - modified by David Lazar 3 | 4 | Originally: 5 | cencode.h - c header for a base64 encoding algorithm 6 | 7 | This is part of the libb64 project, and has been placed in the public domain. 8 | For details, see http://sourceforge.net/projects/libb64 9 | */ 10 | 11 | #ifndef BASE64_ENCODE_H 12 | #define BASE64_ENCODE_H 13 | 14 | #include 15 | #include 16 | 17 | typedef enum { 18 | step_A, step_B, step_C 19 | } base64_encodestep; 20 | 21 | typedef struct { 22 | base64_encodestep step; 23 | uint8_t result; 24 | } base64_encodestate; 25 | 26 | void base64_encode_init(base64_encodestate *); 27 | char base64_encode_value(uint8_t); 28 | ptrdiff_t base64_encode_update(base64_encodestate *, const uint8_t *, uint64_t, char *); 29 | ptrdiff_t base64_encode_final(base64_encodestate *, char *); 30 | ptrdiff_t base64_encode(const uint8_t *, uint64_t, char *); 31 | 32 | #endif /* BASE64_ENCODE_H */ 33 | -------------------------------------------------------------------------------- /proof/Makefile: -------------------------------------------------------------------------------- 1 | # CUSTOMIZE 2 | SYMAPI ?= $(HOME)/src/Verifier/LLVM/sym-api 3 | 4 | MAKEFLAGS = --quiet 5 | 6 | CPPFLAGS := -DINLEN=$(n) -DAIGFILE=encode.aig 7 | 8 | prove: ../base64.cry encode.aig proof.cry 9 | cryptol -q -n proof.cry -b proof.scr | grep -E -v "Loading extern aig|^$$" 10 | 11 | check: ../base64.cry encode.aig proof.cry 12 | cryptol -n proof.cry -c ":check MatchesRef" 13 | 14 | %.bc: ../%.c 15 | clang -emit-llvm -I$(SYMAPI) -c $< -o $@ 16 | 17 | sym_encode.bc: sym_encode.c nvalue 18 | clang -emit-llvm -I.. -I$(SYMAPI) $(CPPFLAGS) -c $< -o $@ 19 | 20 | encode.bc: base64encode.bc sym_encode.bc 21 | llvm-link $^ -o $@ 22 | 23 | encode.aig: encode.bc 24 | lss $< > /dev/null 25 | 26 | proof.cry: proof-template.cry nvalue 27 | cpp -P $(CPPFLAGS) $< > $@ 28 | 29 | nvalue: 30 | ifndef n 31 | $(error Please specify a value for n. For example: make n=16) 32 | endif 33 | 34 | clean: 35 | rm -f *.bc *.aig proof.cry 36 | 37 | .PHONY: prove check nvalue clean 38 | -------------------------------------------------------------------------------- /proof/proof-template.cry: -------------------------------------------------------------------------------- 1 | include "../base64.cry"; 2 | 3 | #define STR(x) #x 4 | #define XSTR(x) STR(x) 5 | #define OUTLEN 4 * ((INLEN + 2) / 3) 6 | 7 | extern AIG encode_aig(XSTR(AIGFILE)) : [INLEN][8] -> [OUTLEN][8]; 8 | 9 | theorem MatchesRef : {x}. encode_aig x == b64encode x; 10 | -------------------------------------------------------------------------------- /proof/proof.scr: -------------------------------------------------------------------------------- 1 | :set symbolic 2 | :print "\nProving function equivalent to reference:" 3 | :type encode_aig 4 | :prove MatchesRef 5 | -------------------------------------------------------------------------------- /proof/sym_encode.c: -------------------------------------------------------------------------------- 1 | #include "base64encode.h" 2 | #include 3 | 4 | #define STR(x) #x 5 | #define XSTR(x) STR(x) 6 | #define OUTLEN 4 * ((INLEN + 2) / 3) 7 | 8 | int main() { 9 | uint8_t *data = lss_fresh_array_uint8(INLEN, 0, NULL); 10 | char result[OUTLEN]; 11 | base64_encode(data, INLEN, result); 12 | lss_write_aiger_array_uint8((uint8_t *) result, OUTLEN, XSTR(AIGFILE)); 13 | 14 | return 0; 15 | } 16 | --------------------------------------------------------------------------------