├── .gitignore ├── README.md ├── bi.h ├── bi_inspect.c └── samples ├── 01.bi ├── 02.bi └── 03.bi /.gitignore: -------------------------------------------------------------------------------- 1 | bi_inspect -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bi format 2 | 3 | Simple Structured Human-Readable Binary Format. It is used by such projects as [rere.py](https://github.com/tsoding/rere.py) and [Porth](https://gitlab.com/tsoding/porth). 4 | 5 | The goals of the format: 6 | - So simple you don't need a library to Parse it or Serializer into it (we do provide a reference implementation of a parser in C though [bi.h](./bi.h)), 7 | - If the data stored in the [Blobs](#blob-field) is Text the whole file looks like Text making any changes committed in VCS have a nice Human-Readable Diff. 8 | 9 | ## Description 10 | 11 | A bi file consist of sequence of fields. There are only 2 kinds of fields for now: [Integer](#integer-field) and [Blob](#blob-field). (We may add more kinds of fields in the future, but it's very unlikely, because these two feel pretty sufficient for now). Each field has a name associated with it. Names don't have to be unique. 12 | 13 | ### Integer field 14 | 15 | - An integer field starts with a sequence of bytes that if viewed as ASCII looks like `:i` followed by exactly ONE space, 16 | - After that comes a sequence of arbitrary bytes (excluding newline `\n`) which denotes the name of the field. It is usually recommended to use ASCII Human-Readable names, but you do you. You can even store arbitrary data in the name of a field. The name ends with exactly ONE space, 17 | - After that comes a sequence of ASCII digits denoting the integer stored in the field. There is no upper bound for integers. Handle overflows however you want. (We don't support negative integers right now, but we may in the future if needed), 18 | - Field ends with ASCII newline `\n`. 19 | 20 | Here is how a bunch of integer fields would look like in a bi file: 21 | 22 | ``` 23 | :i foo 69 24 | :i bar 420 25 | :i baz 1337 26 | ``` 27 | 28 | ### Blob field 29 | 30 | - A blob field starts with a sequence of bytes that if viewed as ASCII looks like `:b` followed by exactly ONE space, 31 | - After that comes a sequence of arbitrary bytes (excluding newline `\n`) which denotes the name of the field. The name ends with exactly ONE space, 32 | - After that comes a sequence of ASCII digits denoting the size of the Blob in bytes. There is no upper bound for integers. Handle overflows however you want, 33 | - After that comes exactly ONE newline `\n`, 34 | - After that come the bytes of the Blob, 35 | - After that comes exactly ONE newline `\n`. 36 | 37 | Here is how a bi file with a bunch of blob fields and an integer field would look like: 38 | 39 | ``` 40 | :i count 3 41 | :b hello 12 42 | Hello, World 43 | :b foo 7 44 | Foo bar 45 | :b test 163 46 | Test test test 47 | 48 | You can have new lines in here. 49 | You can actually store binary data in here. 50 | You can nest another bi file in here, thus 51 | making the format Tree-like. 52 | ``` 53 | 54 | ## Be creative 55 | 56 | It is really encouraged to get creative with this format. Who said you can't store other formats in the Blobs? 57 | 58 | ``` 59 | :b Person/json 69 60 | { 61 | "Name": "John Doe", 62 | "Age": 69, 63 | "Occupation": "Webdev" 64 | } 65 | ``` 66 | -------------------------------------------------------------------------------- /bi.h: -------------------------------------------------------------------------------- 1 | #ifndef BI_H_ 2 | #define BI_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | typedef enum { 9 | BI_BLOB = 'b', 10 | BI_INT = 'i', 11 | } Bi_Type; 12 | 13 | typedef struct { 14 | const char *start; 15 | size_t count; 16 | size_t offset; 17 | 18 | struct { 19 | size_t offset; // offset at which the field starts 20 | char type; // BI_BLOB or BI_INT, might be something else if the file is incorrect, always check for that 21 | const char *name; 22 | size_t name_count; 23 | 24 | size_t integer; // acts like size of the blob when type is BI_BLOB 25 | const char *blob_start; 26 | } field; 27 | } Bi; 28 | 29 | #define bi_is_empty(bi) ((bi).offset >= (bi).count) 30 | #define bi_is_digit(x) ('0' <= (x) && (x) <= '9') // so we don't depend on ctype.h 31 | #define bi_chop_byte(bi) ((bi)->start[(bi)->offset++]) 32 | 33 | // Create a new Bi parser out of the blob field fetched with bi_get_field(). 34 | // Useful to parse .bi recursively. 35 | Bi bi_of_blob(Bi bi); 36 | 37 | // Parse the next field into bi->field. Returns true on success, returns false otherwise 38 | bool bi_get_field(Bi *bi); 39 | 40 | // Bi_Type as human readable name for logging 41 | const char *bi_display_type(Bi_Type type); 42 | 43 | #endif // BI_H_ 44 | 45 | #ifdef BI_IMPLEMENTATION 46 | 47 | Bi bi_of_blob(Bi bi) 48 | { 49 | assert(bi.field.type == BI_BLOB); 50 | return (Bi) { 51 | .start = bi.field.blob_start, 52 | .count = bi.field.integer, 53 | }; 54 | } 55 | 56 | bool bi_get_field(Bi *bi) 57 | { 58 | bi->field.offset = bi->offset; 59 | 60 | if (bi_is_empty(*bi) || bi_chop_byte(bi) != ':') return false; 61 | 62 | if (bi_is_empty(*bi)) return false; 63 | bi->field.type = bi_chop_byte(bi); 64 | 65 | if (bi_is_empty(*bi) || bi_chop_byte(bi) != ' ') return false; 66 | 67 | bi->field.name = &bi->start[bi->offset]; 68 | bi->field.name_count = 0; 69 | while (!bi_is_empty(*bi) && bi->start[bi->offset] != ' ') { 70 | bi->field.name_count++; 71 | bi->offset++; 72 | } 73 | 74 | if (bi_is_empty(*bi) || bi_chop_byte(bi) != ' ') return false; 75 | 76 | bi->field.integer = 0; 77 | while (!bi_is_empty(*bi) && bi_is_digit(bi->start[bi->offset])) { 78 | bi->field.integer *= 10; 79 | bi->field.integer += bi_chop_byte(bi); 80 | bi->field.integer -= '0'; 81 | } 82 | 83 | if (bi_is_empty(*bi) || bi_chop_byte(bi) != '\n') return false; 84 | 85 | bi->field.blob_start = NULL; 86 | if (bi->field.type == BI_BLOB) { 87 | bi->field.blob_start = &bi->start[bi->offset]; 88 | bi->offset += bi->field.integer; 89 | if (bi_is_empty(*bi) || bi_chop_byte(bi) != '\n') return false; 90 | return true; 91 | } 92 | 93 | return true; 94 | } 95 | 96 | const char *bi_display_type(Bi_Type type) 97 | { 98 | switch (type) { 99 | case BI_INT: return "Integer"; 100 | case BI_BLOB: return "Blob"; 101 | default: return "Unknown"; 102 | } 103 | } 104 | 105 | #endif // BI_IMPLEMENTATION 106 | -------------------------------------------------------------------------------- /bi_inspect.c: -------------------------------------------------------------------------------- 1 | // bi.h usage example: 2 | // $ cc -o bi_inspect bi_inspect.c 3 | // $ ./bi_inspect ./samples/*.bi 4 | #include 5 | #include 6 | #include 7 | #include 8 | #define BI_IMPLEMENTATION 9 | #include "bi.h" 10 | 11 | #define shift(xs, xs_sz) (assert(xs_sz > 0), xs_sz--, *xs++) 12 | 13 | bool read_entire_file(const char *path, char **buffer, size_t *buffer_size) 14 | { 15 | bool result = true; 16 | 17 | FILE *f = fopen(path, "rb"); 18 | if (f == NULL) goto fail; 19 | if (fseek(f, 0, SEEK_END) < 0) goto fail; 20 | long m = ftell(f); 21 | if (m < 0) goto fail; 22 | if (fseek(f, 0, SEEK_SET) < 0) goto fail; 23 | 24 | *buffer_size = m; 25 | *buffer = malloc(*buffer_size); 26 | 27 | fread(*buffer, *buffer_size, 1, f); 28 | if (ferror(f)) { 29 | // TODO: Afaik, ferror does not set errno. So the error reporting in fail is not correct in this case. 30 | goto fail; 31 | } 32 | 33 | fclose(f); 34 | return true; 35 | fail: 36 | fprintf(stderr, "Could not read file %s: %s", path, strerror(errno)); 37 | if (f) fclose(f); 38 | return false; 39 | } 40 | 41 | int main(int argc, char **argv) 42 | { 43 | const char *program_name = shift(argv, argc); 44 | 45 | if (argc <= 0) { 46 | fprintf(stderr, "Usage: %s \n", program_name); 47 | fprintf(stderr, "ERROR: no input is provided\n"); 48 | return 1; 49 | } 50 | 51 | while (argc > 0) { 52 | char *buffer = 0; 53 | size_t buffer_size = 0; 54 | const char *file_path = shift(argv, argc); 55 | 56 | if (read_entire_file(file_path, &buffer, &buffer_size)) { 57 | Bi bi = {.start = buffer, .count = buffer_size}; 58 | 59 | while (!bi_is_empty(bi)) { 60 | if (!bi_get_field(&bi)) { 61 | fprintf(stderr, "%s[%zu]: Failed to parse a field. Dropping entire file...\n", file_path, bi.field.offset); 62 | break; 63 | } 64 | switch (bi.field.type) { 65 | case BI_BLOB: 66 | fprintf(stderr, "%s[%zu]: Blob `%.*s` with size %zu bytes\n", file_path, bi.field.offset, (int)bi.field.name_count, bi.field.name, bi.field.integer); 67 | break; 68 | case BI_INT: 69 | fprintf(stderr, "%s[%zu]: Integer `%.*s` with value %zu\n", file_path, bi.field.offset, (int)bi.field.name_count, bi.field.name, bi.field.integer); 70 | break; 71 | default: 72 | fprintf(stderr, "%s[%zu]: Unexpected field type at offset %zu", file_path, bi.field.offset); 73 | } 74 | } 75 | } 76 | 77 | free(buffer); 78 | } 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /samples/01.bi: -------------------------------------------------------------------------------- 1 | :i foo 69 2 | :i bar 420 3 | :i baz 1337 4 | -------------------------------------------------------------------------------- /samples/02.bi: -------------------------------------------------------------------------------- 1 | :i count 3 2 | :b hello 12 3 | Hello, World 4 | :b foo 7 5 | Foo bar 6 | :b test 163 7 | Test test test 8 | 9 | You can have new lines in here. 10 | You can actually store binary data in here. 11 | You can nest another bi file in here, thus 12 | making the format Tree-like. 13 | -------------------------------------------------------------------------------- /samples/03.bi: -------------------------------------------------------------------------------- 1 | :b Person/json 69 2 | { 3 | "Name": "John Doe", 4 | "Age": 69, 5 | "Occupation": "Webdev" 6 | } 7 | --------------------------------------------------------------------------------