├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── macho_reader.c ├── macho_reader.h ├── macho_retriever.c ├── macho_retriever.h ├── macho_util.c ├── macho_util.h ├── main.c └── subject ├── main.c └── power2.c /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 AlexDenisov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=$(shell which clang) 2 | 3 | BUILD_DIR=build 4 | SUBJECT_DIR=subject 5 | 6 | SOURCE_FILES=main.c macho_retriever.c macho_reader.c macho_util.c 7 | 8 | RETRIEVER_BIN=$(BUILD_DIR)/bitcode_retriever 9 | 10 | all: $(BUILD_DIR) 11 | $(CC) $(SOURCE_FILES) -o $(RETRIEVER_BIN) -I/usr/include/libxml2 -lxar -lxml2 12 | 13 | ### Testing 14 | 15 | OBJECT_FILES=i386.o x86_64.o 16 | 17 | subject: $(BUILD_DIR) fat.o 18 | @true 19 | 20 | fat.o: $(OBJECT_FILES) 21 | cd $(BUILD_DIR) && lipo -create $^ -output $@ 22 | 23 | %.o: 24 | $(CC) -fembed-bitcode -c -arch $* $(SUBJECT_DIR)/main.c -o $(BUILD_DIR)/main.o 25 | $(CC) -fembed-bitcode -c -arch $* $(SUBJECT_DIR)/power2.c -o $(BUILD_DIR)/power2.o 26 | $(CC) -fembed-bitcode -arch $* $(BUILD_DIR)/main.o $(BUILD_DIR)/power2.o -o $(BUILD_DIR)/$@ 27 | 28 | $(BUILD_DIR): 29 | mkdir $@ 30 | 31 | clean: 32 | rm -rf $(BUILD_DIR) 33 | 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Bitcode Retriever 2 | 3 | Retrieves Bitcode from Mach-O binaries 4 | 5 | ### About 6 | 7 | [Bitcode](http://llvm.org/docs/BitCodeFormat.html) stores as an [xar](https://en.wikipedia.org/wiki/Xar_(archiver)) archive inside of a Mach-O binary. 8 | 9 | This tool extracts the archive and puts it near the binary, so it can be easily discovered using `xar` and `llvm-dis` 10 | 11 | ### Build 12 | 13 | Clone the repo and run `make`, built binary lays inside of `build` directory 14 | 15 | ```bash 16 | $ git clone git@github.com:AlexDenisov/bitcode_retriever.git 17 | $ cd bitcode_retriever 18 | $ make 19 | ``` 20 | 21 | ### Usage 22 | 23 | _Note: currently is does not work with static libraries, there is an opened issue #1, if you need this feature please a comment there, it will bump prioity of this project at my personal todo-list._ 24 | 25 | To use `bitcode_retriever` simple feed him your binary and it'll produce archive with bitcode. 26 | 27 | It accepts both fat and non-fat binaries. For fat binaries it produces separate archive for each slice, e.g.: 28 | 29 | ```bash 30 | $ bitcode_retriever fat_app 31 | i386.xar 32 | x86_64.xar 33 | arm64.xar 34 | ``` 35 | 36 | for non-fat binaries it produces just one archive with the bitcode: 37 | 38 | ```bash 39 | $ bitcode_retriever non_fat_app 40 | i386.xar 41 | ``` 42 | 43 | To skip the xar archive and obtain the bitcode immediately, pass the `-e` argument. 44 | 45 | ```bash 46 | $ bitcode_retriever -e fat_app 47 | i386.1.bc 48 | i386.2.bc 49 | x86_64.1.bc 50 | x86_64.2.bc 51 | ``` 52 | 53 | The project provides a sample binaries, you can play a bit with them: 54 | 55 | ```bash 56 | $ make subject 57 | $ cd build 58 | $ ./bitcode_retriever i386.o 59 | # or 60 | $ ./bitcode_retriever fat.o 61 | ``` 62 | 63 | The xar archive stores set of files with bitcode: 64 | 65 | ```bash 66 | $ xar -xf i386.o 67 | $ ls 68 | i386.o 1 2 69 | ``` 70 | 71 | You can dump LLVM IR from each file (`1`, `2`) using [`llvm-dis`](http://llvm.org/docs/CommandGuide/llvm-dis.html) 72 | 73 | ```bash 74 | $ llvm-dis 1 75 | $ llvm-dis 2 76 | $ ls 77 | 1 2 1.ll 2.ll 78 | ``` 79 | 80 | ### Bugs and issues 81 | 82 | If you have any problems or found some bug - feel free to open an issue and/or send a pull request 83 | 84 | -------------------------------------------------------------------------------- /macho_reader.c: -------------------------------------------------------------------------------- 1 | #include "macho_reader.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | const static int uint32_size = sizeof(uint32_t); 12 | 13 | const static int fat_header_size = sizeof(struct fat_header); 14 | const static int fat_arch_size = sizeof(struct fat_arch); 15 | 16 | const static int mach_header_size = sizeof(struct mach_header); 17 | const static int mach_header_64_size = sizeof(struct mach_header_64); 18 | 19 | const static int load_command_size = sizeof(struct load_command); 20 | 21 | const static int segment_command_size = sizeof(struct segment_command); 22 | const static int segment_command_64_size = sizeof(struct segment_command_64); 23 | 24 | struct _cpu_type_names { 25 | cpu_type_t cputype; 26 | const char *cpu_name; 27 | }; 28 | 29 | static struct _cpu_type_names cpu_type_names[] = { 30 | { CPU_TYPE_I386, "i386" }, 31 | { CPU_TYPE_X86_64, "x86_64" }, 32 | { CPU_TYPE_ARM, "arm" }, 33 | { CPU_TYPE_ARM64, "arm64" } 34 | }; 35 | 36 | int get_cpu_type_count() { 37 | return (int)(sizeof(cpu_type_names) / sizeof(cpu_type_names[0])); 38 | } 39 | 40 | static const char *cpu_type_name(cpu_type_t cpu_type) { 41 | static int cpu_type_names_size = sizeof(cpu_type_names) / sizeof(struct _cpu_type_names); 42 | for (int i = 0; i < cpu_type_names_size; i++ ) { 43 | if (cpu_type == cpu_type_names[i].cputype) { 44 | return cpu_type_names[i].cpu_name; 45 | } 46 | } 47 | 48 | return "unknown"; 49 | } 50 | 51 | const char *get_cpu_type_name(struct mach_header *header) { 52 | return cpu_type_name(header->cputype); 53 | } 54 | 55 | const char *get_cpu_type_name_64(struct mach_header_64 *header) { 56 | return cpu_type_name(header->cputype); 57 | } 58 | 59 | uint32_t get_magic(FILE *stream, int offset) { 60 | uint32_t magic = 0; 61 | fseek(stream, offset, SEEK_SET); 62 | fread(&magic, uint32_size, 1, stream); 63 | rewind(stream); 64 | return magic; 65 | } 66 | 67 | int is_magic_macho(const uint32_t magic) { 68 | return magic == MH_MAGIC_64 69 | || magic == MH_CIGAM_64 70 | || magic == MH_MAGIC 71 | || magic == MH_CIGAM 72 | || magic == FAT_MAGIC 73 | || magic == FAT_CIGAM; 74 | } 75 | 76 | int is_magic_64(const uint32_t magic) { 77 | return magic == MH_MAGIC_64 || magic == MH_CIGAM_64; 78 | } 79 | 80 | int is_should_swap_bytes(const uint32_t magic) { 81 | return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM; 82 | } 83 | 84 | int is_fat(const uint32_t magic) { 85 | return magic == FAT_MAGIC || magic == FAT_CIGAM; 86 | } 87 | 88 | struct fat_header *load_fat_header(FILE *stream, const int swap_bytes) { 89 | struct fat_header *header = malloc(fat_header_size); 90 | fread(header, fat_header_size, 1, stream); 91 | rewind(stream); 92 | 93 | if (swap_bytes) { 94 | swap_fat_header(header, 0); 95 | } 96 | 97 | return header; 98 | } 99 | 100 | struct fat_arch *load_fat_arch(FILE *stream, const int offset, const int swap_bytes) { 101 | struct fat_arch *arch = malloc(fat_arch_size); 102 | fseek(stream, offset, SEEK_SET); 103 | fread(arch, fat_arch_size, 1, stream); 104 | rewind(stream); 105 | 106 | if (swap_bytes) { 107 | swap_fat_arch(arch, 1, 0); 108 | } 109 | 110 | return arch; 111 | } 112 | 113 | uint32_t offset_for_arch(FILE *stream, const int index, const int swap_bytes) { 114 | int offset = fat_header_size + fat_arch_size * index; 115 | struct fat_arch *arch = load_fat_arch(stream, offset, swap_bytes); 116 | uint32_t arch_offset = arch->offset; 117 | free(arch); 118 | return arch_offset; 119 | } 120 | 121 | struct mach_header *load_mach_header(FILE *stream, const int offset, const int swap_bytes) { 122 | struct mach_header *header = malloc(mach_header_size); 123 | fseek(stream, offset, SEEK_SET); 124 | fread(header, mach_header_size, 1, stream); 125 | rewind(stream); 126 | 127 | if (swap_bytes) { 128 | swap_mach_header(header, 0); 129 | } 130 | 131 | return header; 132 | } 133 | 134 | struct mach_header_64 *load_mach_header_64(FILE *stream, const int offset, const int swap_bytes) { 135 | struct mach_header_64 *header = malloc(mach_header_64_size); 136 | fseek(stream, offset, SEEK_SET); 137 | fread(header, mach_header_64_size, 1, stream); 138 | rewind(stream); 139 | 140 | if (swap_bytes) { 141 | swap_mach_header_64(header, 0); 142 | } 143 | 144 | return header; 145 | } 146 | 147 | struct load_command *load_load_command(FILE *stream, const int offset, const int swap_bytes) { 148 | struct load_command *command = malloc(load_command_size); 149 | fseek(stream, offset, SEEK_SET); 150 | fread(command, load_command_size, 1, stream); 151 | rewind(stream); 152 | 153 | if (swap_bytes) { 154 | swap_load_command(command, 0); 155 | } 156 | 157 | return command; 158 | } 159 | 160 | struct segment_command *load_segment_command(FILE *stream, const int offset, const int swap_bytes) { 161 | struct segment_command *command = malloc(segment_command_size); 162 | fseek(stream, offset, SEEK_SET); 163 | fread(command, segment_command_size, 1, stream); 164 | rewind(stream); 165 | 166 | if (swap_bytes) { 167 | swap_segment_command(command, 0); 168 | } 169 | 170 | return command; 171 | } 172 | 173 | struct segment_command_64 *load_segment_command_64(FILE *stream, const int offset, const int swap_bytes) { 174 | struct segment_command_64 *command = malloc(segment_command_64_size); 175 | fseek(stream, offset, SEEK_SET); 176 | fread(command, segment_command_64_size, 1, stream); 177 | rewind(stream); 178 | 179 | if (swap_bytes) { 180 | swap_segment_command_64(command, 0); 181 | } 182 | 183 | return command; 184 | } 185 | 186 | struct segment_command *load_llvm_segment_command(FILE *stream, struct mach_header *header, const int offset, const int swap_bytes) { 187 | int cmd_offset = offset + mach_header_size; 188 | for (int i = 0; i < header->ncmds; i++) { 189 | struct load_command *cmd = load_load_command(stream, cmd_offset, swap_bytes); 190 | if (cmd->cmd == LC_SEGMENT) { 191 | struct segment_command *segment = load_segment_command(stream, cmd_offset, swap_bytes); 192 | if (!strncmp("__LLVM", segment->segname, 7)) { 193 | return segment; 194 | } 195 | free(segment); 196 | } 197 | cmd_offset += cmd->cmdsize; 198 | free(cmd); 199 | } 200 | 201 | return 0; 202 | } 203 | 204 | struct segment_command_64 *load_llvm_segment_command_64(FILE *stream, struct mach_header_64 *header, const int offset, const int swap_bytes) { 205 | 206 | int cmd_offset = offset + mach_header_64_size; 207 | for (int i = 0; i < header->ncmds; i++) { 208 | struct load_command *cmd = load_load_command(stream, cmd_offset, swap_bytes); 209 | if (cmd->cmd == LC_SEGMENT_64) { 210 | struct segment_command_64 *segment = load_segment_command_64(stream, cmd_offset, swap_bytes); 211 | if (!strncmp("__LLVM", segment->segname, 7)) { 212 | return segment; 213 | } 214 | free(segment); 215 | } 216 | cmd_offset += cmd->cmdsize; 217 | free(cmd); 218 | } 219 | 220 | return 0; 221 | } 222 | 223 | -------------------------------------------------------------------------------- /macho_reader.h: -------------------------------------------------------------------------------- 1 | #ifndef MACHO_READER_H 2 | #define MACHO_READER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | struct mach_header; 12 | struct mach_header_64; 13 | struct fat_header; 14 | 15 | int get_cpu_type_count(); 16 | const char *get_cpu_type_name(struct mach_header *header); 17 | const char *get_cpu_type_name_64(struct mach_header_64 *header); 18 | 19 | uint32_t get_magic(FILE *stream, const int offset); 20 | 21 | int is_magic_macho(const uint32_t magic); 22 | int is_magic_64(const uint32_t magic); 23 | int is_fat(const uint32_t magic); 24 | int is_should_swap_bytes(const uint32_t magic); 25 | 26 | struct fat_header *load_fat_header(FILE *stream, const int swap_bytes); 27 | uint32_t offset_for_arch(FILE *stream, const int index, const int swap_bytes); 28 | 29 | struct mach_header *load_mach_header(FILE *stream, const int offset, const int swap_bytes); 30 | struct mach_header_64 *load_mach_header_64(FILE *stream, const int offset, const int swap_bytes); 31 | 32 | struct segment_command *load_llvm_segment_command(FILE *stream, struct mach_header *header, const int offset, 33 | const int swap_bytes); 34 | struct segment_command_64 *load_llvm_segment_command_64(FILE *stream, struct mach_header_64 *header, const int offset, 35 | const int swap_bytes); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | #endif 41 | -------------------------------------------------------------------------------- /macho_retriever.c: -------------------------------------------------------------------------------- 1 | #include "macho_reader.h" 2 | #include "macho_retriever.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | int max_number_of_archives() { return get_cpu_type_count(); } 10 | 11 | struct bitcode_archive *make_bitcode(FILE *stream, const char *cpuname, const uint64_t offset, const uint64_t size) { 12 | struct bitcode_archive *bitcode = malloc(sizeof(struct bitcode_archive)); 13 | bitcode->size = size; 14 | 15 | bitcode->buffer = malloc(sizeof(char) * size); 16 | fseek(stream, offset, SEEK_SET); 17 | fread(bitcode->buffer, sizeof(char), size, stream); 18 | 19 | bitcode->cpu = cpuname; 20 | return bitcode; 21 | } 22 | 23 | struct bitcode_archive *extract_bitcode(FILE *stream, const int offset, const int swap_bytes) { 24 | struct mach_header *header = load_mach_header(stream, offset, swap_bytes); 25 | const char *cpu_name = get_cpu_type_name(header); 26 | struct segment_command *segment = load_llvm_segment_command(stream, header, offset, swap_bytes); 27 | if (!segment) { 28 | free(header); 29 | return NULL; 30 | } 31 | 32 | struct bitcode_archive *bitcode = make_bitcode(stream, cpu_name, offset + segment->fileoff, segment->filesize); 33 | free(segment); 34 | free(header); 35 | return bitcode; 36 | } 37 | 38 | struct bitcode_archive *extract_bitcode_64(FILE *stream, const int offset, const int swap_bytes) { 39 | struct mach_header_64 *header = load_mach_header_64(stream, offset, swap_bytes); 40 | const char *cpu_name = get_cpu_type_name_64(header); 41 | struct segment_command_64 *segment = load_llvm_segment_command_64(stream, header, offset, swap_bytes); 42 | if (!segment) { 43 | free(header); 44 | return NULL; 45 | } 46 | 47 | struct bitcode_archive *bitcode = make_bitcode(stream, cpu_name, offset + segment->fileoff, segment->filesize); 48 | free(segment); 49 | free(header); 50 | return bitcode; 51 | } 52 | 53 | struct bitcode_archive *retrieve_bitcode_from_nonfat(FILE *stream, const uint32_t offset) { 54 | uint32_t magic = get_magic(stream, offset); 55 | int is64 = is_magic_64(magic); 56 | int swap_bytes = is_should_swap_bytes(magic); 57 | 58 | if (is64) { 59 | return extract_bitcode_64(stream, offset, swap_bytes); 60 | } else { 61 | return extract_bitcode(stream, offset, swap_bytes); 62 | } 63 | } 64 | 65 | int is_macho(FILE *stream) { 66 | uint32_t magic = get_magic(stream, 0); 67 | return is_magic_macho(magic); 68 | } 69 | 70 | void retrieve_bitcode(FILE *stream, struct bitcode_archive *bitcodes[], int *count) { 71 | uint32_t magic = get_magic(stream, 0); 72 | if (is_fat(magic)) { 73 | int swap_bytes = is_should_swap_bytes(magic); 74 | struct fat_header *header = load_fat_header(stream, swap_bytes); 75 | *count = header->nfat_arch; 76 | for (int i = 0; i < *count; i++) { 77 | uint32_t offset = offset_for_arch(stream, i, swap_bytes); 78 | bitcodes[i] = retrieve_bitcode_from_nonfat(stream, offset); 79 | } 80 | free(header); 81 | } else { 82 | bitcodes[0] = retrieve_bitcode_from_nonfat(stream, 0); 83 | *count = 1; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /macho_retriever.h: -------------------------------------------------------------------------------- 1 | #ifndef MACHO_RETRIEVER_H 2 | #define MACHO_RETRIEVER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | struct bitcode_archive { 12 | uint64_t size; 13 | char* buffer; 14 | const char* cpu; 15 | }; 16 | 17 | struct bitcode_archive* make_bitcode(FILE* stream, const char* cpuname, const uint64_t offset, const uint64_t size); 18 | struct bitcode_archive* extract_bitcode(FILE* stream, const int offset, const int swap_bytes); 19 | struct bitcode_archive* extract_bitcode_64(FILE* stream, const int offset, const int swap_bytes); 20 | 21 | struct bitcode_archive* retrieve_bitcode_from_nonfat(FILE* stream, const uint32_t offset); 22 | 23 | int max_number_of_archives(); 24 | int is_macho(FILE* stream); 25 | 26 | void retrieve_bitcode(FILE* stream, struct bitcode_archive* bitcodes[], int* count); 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | #endif 32 | -------------------------------------------------------------------------------- /macho_util.c: -------------------------------------------------------------------------------- 1 | #include "macho_reader.h" 2 | #include "macho_retriever.h" 3 | #include "macho_util.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | char *fname(const char *name, const char *ext) { 13 | const char *delimiter = "."; 14 | int length = strlen(name) + strlen(delimiter) + strlen(ext) + 1; 15 | char *filename = calloc(sizeof(char), length); 16 | strcpy(filename, name); 17 | strcat(filename, delimiter); 18 | strcat(filename, ext); 19 | return filename; 20 | } 21 | 22 | char *write_to_xar(struct bitcode_archive *bitcode) { 23 | char *filename = fname(bitcode->cpu, "xar"); 24 | FILE *output = fopen(filename, "wb"); 25 | 26 | if (!output) { 27 | fprintf(stderr, "Cannot open '%s' for writing\n", filename); 28 | free(filename); 29 | return NULL; 30 | } 31 | 32 | fwrite(bitcode->buffer, sizeof(char), bitcode->size, output); 33 | fclose(output); 34 | 35 | return filename; 36 | } 37 | 38 | int extract_xar(const char *path, const char *cpu, char *files[], int *count) { 39 | xar_t x; 40 | xar_iter_t xi; 41 | xar_file_t xf; 42 | xar_stream xs; 43 | char buffer[8192]; 44 | 45 | x = xar_open(path, READ); 46 | if (!x) { 47 | fprintf(stderr, "Error opening archive\n"); 48 | return 1; 49 | } 50 | 51 | xi = xar_iter_new(); 52 | if (!xi) { 53 | fprintf(stderr, "Error creating xar iterator\n"); 54 | return 2; 55 | } 56 | 57 | *count = 0; 58 | for (xf = xar_file_first(x, xi); xf; xf = xar_file_next(xi)) { 59 | char *path = xar_get_path(xf); 60 | const char *type; 61 | xar_prop_get(xf, "type", &type); 62 | if (!type) { 63 | fprintf(stderr, "File has no type %s\n", path); 64 | free(path); 65 | continue; 66 | } 67 | if (strcmp(type, "file") != 0) { 68 | fprintf(stderr, "Skipping %s\n", path); 69 | free(path); 70 | continue; 71 | } 72 | 73 | if (xar_extract_tostream_init(x, xf, &xs) != XAR_STREAM_OK) { 74 | fprintf(stderr, "Error initializing stream %s\n", path); 75 | free(path); 76 | continue; 77 | } 78 | 79 | char *prefix = fname(cpu, path); 80 | char *output_path = fname(prefix, "bc"); 81 | free(path); 82 | free(prefix); 83 | 84 | FILE *output = fopen(output_path, "wb"); 85 | if (!output) { 86 | fprintf(stderr, "Error opening output file %s\n", output_path); 87 | free(output_path); 88 | continue; 89 | } 90 | 91 | xs.avail_out = sizeof(buffer); 92 | xs.next_out = buffer; 93 | 94 | int32_t ret; 95 | while ((ret = xar_extract_tostream(&xs)) != XAR_STREAM_END) { 96 | if (ret == XAR_STREAM_ERR) { 97 | fprintf(stderr, "Error extracting stream %s\n", output_path); 98 | free(output_path); 99 | return 3; 100 | } 101 | 102 | fwrite(buffer, sizeof(char), sizeof(buffer) - xs.avail_out, output); 103 | 104 | xs.avail_out = sizeof(buffer); 105 | xs.next_out = buffer; 106 | } 107 | 108 | if (xar_extract_tostream_end(&xs) != XAR_STREAM_OK) { 109 | fprintf(stderr, "Error ending stream %s\n", output_path); 110 | } 111 | 112 | fclose(output); 113 | files[(*count)++] = output_path; 114 | } 115 | return 0; 116 | } 117 | 118 | int write_to_bitcode(struct bitcode_archive *bitcode, char *files[], int *count) { 119 | char *xar_file = write_to_xar(bitcode); 120 | int extracted = extract_xar(xar_file, bitcode->cpu, files, count); 121 | if (extracted != 0) { 122 | fprintf(stderr, "Error extracting xar file %s\n", xar_file); 123 | free(xar_file); 124 | return 1; 125 | } 126 | 127 | int removed = remove(xar_file); 128 | if (removed != 0) { 129 | fprintf(stderr, "Error removing xar file %s\n", xar_file); 130 | free(xar_file); 131 | return 2; 132 | } 133 | 134 | free(xar_file); 135 | return 0; 136 | } 137 | 138 | int get_options(xmlNode *option_parent, char *options[], int *size) { 139 | xmlNode *cur_node = NULL; 140 | *size = 0; 141 | for (cur_node = option_parent; cur_node; cur_node = cur_node->next) { 142 | if (cur_node->type == XML_ELEMENT_NODE && strcmp((const char *)cur_node->name, "option") == 0) { 143 | char *content = (char *)xmlNodeGetContent(cur_node); 144 | options[(*size)++] = content; 145 | } 146 | } 147 | return 0; 148 | } 149 | 150 | int get_linker_options(xmlNode *a_node, char *options[], int *size) { 151 | xmlNode *cur_node = NULL; 152 | for (cur_node = a_node; cur_node; cur_node = cur_node->next) { 153 | if (cur_node->type == XML_ELEMENT_NODE) { 154 | if (strcmp((const char *)cur_node->name, "link-options") == 0) { 155 | return get_options(cur_node->children, options, size); 156 | } else { 157 | get_linker_options(cur_node->children, options, size); 158 | } 159 | } 160 | } 161 | return 1; 162 | } 163 | 164 | int retrieve_toc(const char *xar_path, const char *toc_path) { 165 | xar_t x; 166 | x = xar_open(xar_path, READ); 167 | if (!x) { 168 | fprintf(stderr, "Error opening xar archive %s\n", xar_path); 169 | return 1; 170 | } 171 | xar_serialize(x, toc_path); 172 | return 0; 173 | } 174 | 175 | int retrieve_linker_options(const char *xar_path, char *options[], int *size) { 176 | const char *toc_file = "toc.temp"; 177 | retrieve_toc(xar_path, toc_file); 178 | 179 | xmlDoc *doc = NULL; 180 | doc = xmlReadFile(toc_file, NULL, 0); 181 | if (doc == NULL) { 182 | fprintf(stderr, "Cannot parse TOC %s\n", toc_file); 183 | remove(toc_file); 184 | return 1; 185 | } 186 | 187 | get_linker_options(xmlDocGetRootElement(doc), options, size); 188 | 189 | xmlFreeDoc(doc); 190 | xmlCleanupParser(); 191 | 192 | remove(toc_file); 193 | 194 | return 0; 195 | } 196 | -------------------------------------------------------------------------------- /macho_util.h: -------------------------------------------------------------------------------- 1 | #ifndef MACHO_UTIL_H 2 | #define MACHO_UTIL_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include "macho_retriever.h" 10 | 11 | char *fname(const char *name, const char *ext); 12 | char *write_to_xar(struct bitcode_archive *bitcode); 13 | 14 | int extract_xar(const char *path, const char *cpu, char *files[], int *count); 15 | int write_to_bitcode(struct bitcode_archive *bitcode, char *files[], int *count); 16 | 17 | int get_options(xmlNode *option_parent, char *options[], int *size); 18 | int get_linker_options(xmlNode *a_node, char *options[], int *size); 19 | int retrieve_toc(const char *xar_path, const char *toc_path); 20 | int retrieve_linker_options(const char *xar_path, char *options[], int *size); 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | #endif 26 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include "macho_retriever.h" 2 | #include "macho_util.h" 3 | 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | int extract = 0; 9 | int linker_options = 0; 10 | 11 | int c; 12 | while ((c = getopt(argc, argv, "el")) != -1) { 13 | switch (c) { 14 | case 'e': 15 | extract = 1; 16 | break; 17 | case 'l': 18 | linker_options = 1; 19 | break; 20 | default: 21 | fprintf(stderr, "Unknown option `-%c'.\n", optopt); 22 | exit(1); 23 | } 24 | } 25 | 26 | if(optind >= argc) { 27 | fprintf(stderr, "No file provided.\n"); 28 | } 29 | 30 | char *filename = argv[optind]; 31 | 32 | FILE *stream = fopen(filename, "rb"); 33 | 34 | struct bitcode_archive *archives[max_number_of_archives()]; 35 | int archive_count; 36 | retrieve_bitcode(stream, archives, &archive_count); 37 | 38 | for (int i = 0; i < archive_count; i++) { 39 | if (archives[i]) { 40 | 41 | if (extract) { 42 | char *bitcode_files[1024]; 43 | int bitcode_count; 44 | write_to_bitcode(archives[i], bitcode_files, &bitcode_count); 45 | for (int j = 0; j < bitcode_count; j++) { 46 | printf("%s\n", bitcode_files[j]); 47 | free(bitcode_files[j]); 48 | } 49 | } 50 | 51 | if(!extract || linker_options) { 52 | char *xar_name = write_to_xar(archives[i]); 53 | printf("%s\n", xar_name); 54 | 55 | if (linker_options) { 56 | char *options[128]; 57 | int size = 0; 58 | retrieve_linker_options(xar_name, options, &size); 59 | printf("Linker options: "); 60 | for (int i = 0; i < size; i++) { 61 | printf("%s ", options[i]); 62 | free(options[i]); 63 | } 64 | printf("\n"); 65 | } 66 | 67 | free(xar_name); 68 | } 69 | free(archives[i]->buffer); 70 | free(archives[i]); 71 | } 72 | } 73 | 74 | fclose(stream); 75 | return 0; 76 | } 77 | -------------------------------------------------------------------------------- /subject/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern int power2(int x); 4 | 5 | int main() { 6 | printf("%d\n", power2(4)); 7 | return 0; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /subject/power2.c: -------------------------------------------------------------------------------- 1 | int power2(int x) { 2 | return x * x; 3 | } 4 | 5 | --------------------------------------------------------------------------------