├── LICENSE.md ├── README.md ├── memory.c ├── process.py ├── requirements.txt └── startup_load.c /LICENSE.md: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2022, alex Yang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rwcompression 2 | GCC .data/.bss runtime loader and post-build data compressor 3 | 4 | - This tool provides the ability to compress/load rw image with gcc binaries, just like what commercial toolchain did (`Keil MDK`, `IAR EWARM`, etc) 5 | - Loader: `startup_load.c`, The only thing need to do is: 6 | 1. add `startup_load.c` to project 7 | 2. call `_mainCRTStartup` before `main` 8 | - Post-build: `process.py` 9 | - Everything should be done automatically, even without changing a single line in your link script 10 | 11 | --- 12 | 13 | solve python dependency by: 14 | 15 | ``` 16 | pip3 install -r requirements.txt 17 | ``` 18 | 19 | call `process.py` with a given elf file will generate a patched file `patched.elf` 20 | 21 | ``` 22 | python3 process.py firmware.elf 23 | ``` 24 | 25 | you can use `objcopy` to generate executable binary from `patched.elf` directly 26 | 27 | ``` 28 | objcopy -O binary -S patched.elf patched.bin 29 | ``` 30 | 31 | `patched.elf` also can be load & programmed by debuggers automatically 32 | -------------------------------------------------------------------------------- /memory.c: -------------------------------------------------------------------------------- 1 | /* 2 | *********************************************************************** 3 | * Copyright (c) 2023 alex Yang 4 | * 5 | * @file memory.c 6 | * @brief 此文件实现了libc runtime memory api的优化实现 (arm) 7 | * @history 8 | * Version Date Author Modification 9 | * V1.0.0 Aug-09-2023 null.yang create file 10 | * 11 | * 12 | *********************************************************************** 13 | */ 14 | 15 | #include "stddef.h" 16 | #include "stdint.h" 17 | #include "string.h" 18 | 19 | void *memcpy(void *dst, const void *src, size_t n) 20 | { 21 | uint8_t *dst_byte = (uint8_t *)dst; 22 | uint32_t *dst_word = (uint32_t *)dst_byte; 23 | const uint8_t *src_byte = (const uint8_t *)src; 24 | const uint32_t *src_word = (const uint32_t *)src_byte; 25 | 26 | /* make sure dst and src are in the same alignment */ 27 | if ((((uintptr_t)dst ^ (uintptr_t)src) & 0x03) == 0u) 28 | { 29 | if ((uintptr_t)dst & 0x03 || n < sizeof(*src_word)) 30 | { 31 | /* padding copy to aligned base address, 32 | * until cursor reaches the end */ 33 | while ((uintptr_t)dst_byte & 0x03 && n > 0) 34 | { 35 | *dst_byte++ = *src_byte++; 36 | n -= sizeof(*src_byte); 37 | }; 38 | } 39 | 40 | dst_word = (uint32_t *)dst_byte; 41 | src_word = (uint32_t *)src_byte; 42 | 43 | /* accelerated word copy from aligned base address */ 44 | while (n >= sizeof(*src_word)) 45 | { 46 | *dst_word++ = *src_word++; 47 | n -= sizeof(*src_word); 48 | }; 49 | 50 | dst_byte = (uint8_t *)dst_word; 51 | src_byte = (uint8_t *)src_word; 52 | }; 53 | 54 | /* unaligned byte copy left */ 55 | while (n > 0) 56 | { 57 | *dst_byte++ = *src_byte++; 58 | n -= sizeof(*src_byte); 59 | }; 60 | 61 | return dst; 62 | } 63 | 64 | void *memset(void *dst, int pattern, size_t size) 65 | { 66 | uint8_t c = (uint8_t)pattern; 67 | uint8_t *dst_byte = (uint8_t *)dst; 68 | uint32_t *dst_word = (uint32_t *)dst_byte; 69 | 70 | if (((uintptr_t)dst & 0x03) == 0u || size < sizeof(c) * 4) 71 | { 72 | while ((uintptr_t)dst_byte & 0x03) 73 | { 74 | *dst_byte++ = c; 75 | size -= sizeof(c); 76 | }; 77 | 78 | dst_word = (uint32_t *)dst_byte; 79 | 80 | while (size >= sizeof(c) * 4) 81 | { 82 | *dst_word++ = (uint32_t)((c << 24) | (c << 16) | (c << 8) | c); 83 | size -= sizeof(c) * 4; 84 | }; 85 | 86 | dst_byte = (uint8_t *)dst_word; 87 | }; 88 | 89 | while (size > 0) 90 | { 91 | *dst_byte++ = c; 92 | size -= sizeof(c); 93 | }; 94 | 95 | return dst; 96 | } 97 | -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | import sys, ctypes, struct, shutil 2 | from os.path import join, dirname 3 | from enum import Enum 4 | from elftools.elf.sections import Symbol 5 | from elftools.elf.elffile import ELFFile 6 | from prettytable import PrettyTable 7 | 8 | # Defines Compression Methods 9 | Method = Enum('CompresstionMethods', {'NO_COMPRESSION':0, 'BSS_SET_ZERO':1, 'RW_ZERO_RLE':2, 'RW_LZ77':3}) 10 | 11 | class LoadSegment(object): 12 | def __init__(self, index, segment): 13 | self.segindex = index 14 | self.segment = segment 15 | # load = (method, data, rw_sz, bss_sz,) 16 | self.load = (Method.NO_COMPRESSION, list(segment.data()), segment['p_filesz'], segment['p_memsz'] - segment['p_filesz']) 17 | 18 | # add rw segment 19 | __compressions = (self.__no_compress, self.__zero_rle_compress, self.__lz77) 20 | for compression in __compressions: 21 | (method, data, rw_sz) = compression(segment.data()) 22 | if rw_sz < segment['p_filesz']: 23 | self.load = (method, data, rw_sz, segment['p_memsz'] - segment['p_filesz']) 24 | 25 | def __no_compress(self, input): 26 | return (Method.NO_COMPRESSION, list(input), len(input)) 27 | 28 | def __zero_rle_compress(self, input): 29 | (count, output) = (0, []) 30 | for i, val in enumerate(input): 31 | if i == 0: 32 | output.append(val) 33 | count = 1 34 | else: 35 | if val == 0 and input[i - 1] == 0: 36 | if count < 255: 37 | count += 1 38 | else: 39 | output.append(count) 40 | output.append(0) 41 | count = 1 42 | else: 43 | if input[i - 1] == 0: 44 | output.append(count) 45 | output.append(val) 46 | count = 1 47 | if i == len(input) - 1 and val == 0: 48 | output.append(count) 49 | 50 | return (Method.RW_ZERO_RLE, output, len(output)) 51 | 52 | def __lz77(self, input): 53 | # FIXME: not supported yet, use NO_COMPRESSION instead 54 | return self.__no_compress(input) 55 | 56 | def patch(self, elf, prev): 57 | assert isinstance(elf, ELFFile), 'not a ELFFile!' 58 | 59 | # skip segment without rw image 60 | if 0 == self.segment['p_filesz']: 61 | print("skip patching segment without rw image at 0x%x" % (self.segment['p_vaddr'])) 62 | return self 63 | 64 | # define param 65 | ptr = 0 if prev is None else prev.segment['p_paddr'] 66 | vma = self.segment['p_vaddr'] 67 | lma = self.segment['p_paddr'] + 32 # 32 == sizeof(lhdr) 68 | memsz = self.segment['p_memsz'] 69 | method = self.load[0] 70 | data = self.load[1] 71 | rw_sz = self.load[2] 72 | bss_sz = self.load[3] 73 | 74 | # generate lhdr in segment 75 | ''' 76 | typedef struct lhdr { 77 | uint32_t prev; /* previous lhdr lma */ 78 | uint32_t method; /* Segment method */ 79 | uint32_t vaddr; /* Segment virtual address */ 80 | uint32_t paddr; /* Segment physical address */ 81 | uint32_t memsz; /* Segment size in file */ 82 | uint32_t rw_sz; /* Segment size in file (.data) */ 83 | uint32_t bss_sz; /* Segment size in file (.bss) */ 84 | uint32_t reserved; /* reserved datafield */ 85 | } lhdr_t; 86 | ''' 87 | lhdr = struct.pack('IIIIIIII', 88 | ptr, 89 | method.value, 90 | vma, 91 | lma, 92 | memsz, 93 | rw_sz, 94 | bss_sz, 95 | 0) 96 | 97 | # modify filesz in section header for debug compability (shdr) 98 | for section in elf.iter_sections(type='SHT_PROGBITS'): 99 | if self.segment.section_in_segment(section): 100 | if section['sh_addr'] == self.segment['p_vaddr']: 101 | __section_name = section.name 102 | __section_index = elf.get_section_index(__section_name) 103 | __section_offset = elf._section_offset(__section_index) + 20 # 20 == offset of 'sh_size' in shdr 104 | __section_filesz = ctypes.c_uint32(rw_sz + len(lhdr)) 105 | elf.stream.seek(__section_offset, 0) 106 | elf.stream.write(__section_filesz) 107 | 108 | # modify filesz in segment header for debug compability (phdr) 109 | __segment_index = self.segindex 110 | __segment_offset = elf._segment_offset(__segment_index) + 16 # 16 == offset of 'p_filesz' in phdr 111 | __segment_filesz = rw_sz + len(lhdr) 112 | elf.stream.seek(__segment_offset, 0) 113 | elf.stream.write(ctypes.c_uint32(__segment_filesz)) 114 | 115 | # patch segment image 116 | __image_start = self.segment['p_offset'] 117 | __image_end = self.segment['p_filesz'] + __image_start 118 | elf.stream.seek(__image_start, 0) 119 | elf.stream.write(bytearray(lhdr)) 120 | elf.stream.write(bytearray(data)) 121 | assert elf.stream.tell() <= __image_end, '%s compression at segment[%s]: "%s" not works, filesz %s > %s cause segment data overflow!' % (method.name, __segment_index, __section_name, hex(__segment_filesz), hex(self.segment['p_filesz'])) 122 | 123 | # return self for info collection 124 | return self 125 | 126 | def __merge_segments(LoadSegments): 127 | segments = {} 128 | for LoadSegment in LoadSegments: 129 | if (LoadSegment.load[0] == Method.NO_COMPRESSION): 130 | lma = LoadSegment.segment['p_paddr'] 131 | if lma in segments: 132 | segments[lma].append(LoadSegment) 133 | else: 134 | segments[lma] = [LoadSegment] 135 | for lma, sections in segments.items(): 136 | if len(sections) > 1: 137 | current_base, current_offset = sections[0].segment['p_vaddr'], sections[0].segment['p_memsz'] 138 | for section in sections: 139 | if (section.segment['p_vaddr'] == current_base + current_offset): 140 | # merge sections mapped with the same segment 141 | current_offset += section.segment['p_memsz'] 142 | method = sections[0].load[0] 143 | data = sections[0].load[1] 144 | rw_sz = sections[0].load[2] + section.segment['p_filesz'] 145 | bss_sz = sections[0].load[3] + section.segment['p_memsz'] - section.segment['p_filesz'] 146 | sections[0].load = (method, data, rw_sz, bss_sz) 147 | segments[lma].remove(section) 148 | LoadSegments.remove(section) 149 | return LoadSegments 150 | 151 | def __find_symbol(elf, name): 152 | assert isinstance(elf, ELFFile), 'not a ELFFile!' 153 | for section in elf.iter_sections(type='SHT_SYMTAB'): 154 | symbols = section.get_symbol_by_name(name) 155 | if symbols is not None: 156 | return symbols[0] if len(symbols) == 1 else None 157 | else: 158 | return None; 159 | return None 160 | 161 | def __patch(elf, prev): 162 | assert isinstance(elf, ELFFile), 'not a ELFFile!' 163 | 164 | header = __find_symbol(elf, '__load_header') 165 | assert isinstance(header, Symbol), 'Symbol \'__load_header\' not found or duplicated!' 166 | 167 | # generate lhdr at __load_header 168 | ''' 169 | typedef struct lhdr { 170 | uint32_t prev; /* previous lhdr lma */ 171 | uint32_t method; /* Segment method */ 172 | uint32_t vaddr; /* Segment virtual address */ 173 | uint32_t paddr; /* Segment physical address */ 174 | uint32_t memsz; /* Segment size in file */ 175 | uint32_t rw_sz; /* Segment size in file (.data) */ 176 | uint32_t bss_sz; /* Segment size in file (.bss) */ 177 | uint32_t reserved; /* reserved datafiled */ 178 | } lhdr_t; 179 | ''' 180 | ptr = 0 if prev is None else prev.segment['p_paddr'] 181 | offset = next(elf.address_offsets(header.entry['st_value'], size=0x20), None) 182 | data = struct.pack('IIIIIIII', 183 | ptr, 184 | Method.NO_COMPRESSION.value, 185 | header.entry['st_value'], 186 | header.entry['st_value'], 187 | 0, 188 | 0, 189 | 0, 190 | 0) 191 | # patch elf 192 | elf.stream.seek(offset) 193 | elf.stream.write(bytearray(data)) 194 | 195 | def __report(blocks): 196 | report = PrettyTable() 197 | report.title = 'RW/BSS Report:' 198 | report.field_names = ['index', 'type', 'vma', 'lma', 'newsz', 'oldsz', 'ratio', 'method'] 199 | report.align = 'l' 200 | 201 | for index in range(len(blocks)): 202 | block = blocks[index] 203 | method = block.load[0] 204 | data = block.load[1] 205 | rw_sz = block.load[2] 206 | bss_sz = block.load[3] 207 | vma = block.segment['p_vaddr'] 208 | lma = block.segment['p_paddr'] 209 | 210 | if rw_sz > 0: 211 | newsz = rw_sz 212 | oldsz = block.segment['p_filesz'] 213 | ratio = "{:.2%}".format(newsz/oldsz) 214 | report.add_row([index, 'rw', hex(vma), hex(lma), newsz, oldsz, ratio, method.name]) 215 | 216 | if bss_sz > 0: 217 | newsz = 0 218 | oldsz = bss_sz 219 | ratio = "{:.2%}".format(newsz/oldsz) 220 | report.add_row([index, 'bss', hex(vma), hex(lma), newsz, oldsz, ratio, Method.BSS_SET_ZERO.name]) 221 | 222 | print(report) if len(blocks) else None 223 | 224 | def process(elffile): 225 | with open(elffile, 'r+b') as fin: 226 | (elf, info, prev) = (ELFFile(fin), [], None) 227 | # 1st pass: match segments that need to load 228 | segments = [] 229 | for index in range(elf.num_segments()): 230 | segment = elf.get_segment(index) 231 | if segment['p_type'] == 'PT_LOAD' and segment['p_vaddr'] != segment['p_paddr']: 232 | segments.append(LoadSegment(index, segment)) 233 | # 2nd pass: merge segments point to sections with continuous memory 234 | segments = __merge_segments(segments) 235 | # 3rd pass: patch segments 236 | for segment in segments: 237 | prev = segment.patch(elf, prev) 238 | info.append(prev) 239 | __patch(elf, prev) 240 | __report(info) 241 | 242 | if __name__ == '__main__': 243 | # Prepare backup image 244 | input = sys.argv[1] 245 | output = sys.argv[2] if len(sys.argv) == 3 else join(dirname(input), 'patched.elf') 246 | shutil.copy(input, output) 247 | 248 | # Process image 249 | process(output) 250 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | prettytable==0.7.2 2 | pyelftools==0.28 3 | -------------------------------------------------------------------------------- /startup_load.c: -------------------------------------------------------------------------------- 1 | /* 2 | *********************************************************************** 3 | * Copyright (c) 2022 alex Yang 4 | * 5 | * @file startup_load.c 6 | * @brief 此文件为启动加载程序, 负责在进入用户main()函数前准备c runtime 7 | * @history 8 | * Version Date Author Modification 9 | * V1.0.0 May-15-2022 null.yang create file 10 | * 11 | * 12 | *********************************************************************** 13 | */ 14 | 15 | #include "stddef.h" 16 | #include "stdint.h" 17 | #include "string.h" 18 | 19 | #if defined ( __CC_ARM ) 20 | #defien __main __main 21 | #elif defined ( __GNUC__ ) 22 | #define __main _mainCRTStartup 23 | #elif defined ( __ICCARM__ ) 24 | #define __main __iar_program_start 25 | #endif 26 | 27 | #define LOAD_HDR_MAGIC (0xFEE1DEAD) 28 | 29 | typedef enum load_method { 30 | NO_COMPRESSION = 0, 31 | BSS_SET_ZERO = 1, 32 | RW_ZERO_RLE = 2, 33 | RW_LZ77 = 3, 34 | } load_method_t; 35 | 36 | typedef struct lhdr { 37 | uint32_t prev; /* previous lhdr lma */ 38 | uint32_t method; /* Segment method */ 39 | uint32_t vaddr; /* Segment virtual address */ 40 | uint32_t paddr; /* Segment physical address */ 41 | uint32_t memsz; /* Segment size in file */ 42 | uint32_t rw_sz; /* Segment size in file (.data) */ 43 | uint32_t bss_sz; /* Segment size in file (.bss) */ 44 | uint32_t reserved; /* reserved datafield */ 45 | } lhdr_t; 46 | 47 | __attribute__((used)) static const lhdr_t __load_header; 48 | 49 | __attribute__((always_inline, used)) inline 50 | static void __load_memcpy(void *dst, void *src, size_t size) 51 | { 52 | uint8_t *to = (uint8_t *)dst; 53 | uint8_t *from = (uint8_t *)src; 54 | 55 | while (size > 0) 56 | { 57 | *to++ = *from++; 58 | size--; 59 | } 60 | } 61 | 62 | __attribute__((always_inline, used)) inline 63 | static void __load_memset(void *dst, uint8_t pattern, size_t size) 64 | { 65 | uint8_t *to = (uint8_t *)dst; 66 | 67 | while (size > 0) 68 | { 69 | *to++ = pattern; 70 | size--; 71 | } 72 | } 73 | 74 | __attribute__((always_inline, used)) inline 75 | static void __load_zero_rle(uint8_t *vma, uint8_t *lma, size_t memsize) 76 | { 77 | uint8_t *to = (uint8_t *)vma; 78 | uint8_t *from = (uint8_t *)lma; 79 | 80 | while (memsize > 0) 81 | { 82 | if (*from != 0) 83 | { 84 | *to++ = *from++; 85 | memsize--; 86 | } 87 | else if (*from == 0) 88 | { 89 | uint8_t val = *from++; 90 | uint8_t len = *from++; 91 | while (len-- > 0) 92 | { 93 | *to++ = val; 94 | memsize--; 95 | } 96 | } 97 | } 98 | } 99 | 100 | __attribute__((always_inline, used)) inline 101 | static int __load_segment(lhdr_t *const lhdr) 102 | { 103 | if (NULL == lhdr) 104 | { 105 | return -1; 106 | } 107 | 108 | uint8_t *vma = (uint8_t *)lhdr->vaddr; 109 | uint8_t *lma = (uint8_t *)lhdr->paddr; 110 | size_t mem_sz = (size_t)lhdr->memsz; /* total size after decompression (.data + .bss) */ 111 | size_t bss_sz = (size_t)lhdr->bss_sz; /* size of .bss part after decompression */ 112 | size_t data_sz = (size_t)mem_sz - bss_sz; /* size of .data part after decompression */ 113 | 114 | /* load .data */ 115 | switch (lhdr->method) 116 | { 117 | case NO_COMPRESSION: 118 | __load_memcpy(vma, lma, data_sz); 119 | break; 120 | case RW_ZERO_RLE: 121 | __load_zero_rle(vma, lma, data_sz); 122 | break; 123 | case BSS_SET_ZERO: 124 | break; 125 | case RW_LZ77: /* no break, return error immediately */ 126 | default: 127 | return -1; 128 | } 129 | 130 | /* load .bss */ 131 | __load_memset((uint8_t *)(vma + data_sz), 0x00, bss_sz); 132 | 133 | return 0; 134 | } 135 | 136 | __attribute__((always_inline, used)) inline 137 | static int __load_program(lhdr_t *lhdr) 138 | { 139 | if (NULL == lhdr) 140 | { 141 | return -1; 142 | } 143 | 144 | do { 145 | lhdr = (lhdr_t *)lhdr->prev; 146 | if (__load_segment(lhdr)) 147 | { 148 | return -2; 149 | } 150 | } while (lhdr); 151 | 152 | return 0; 153 | } 154 | 155 | /* 156 | * Make sure _mainCRTStartup() procedure is stack-less, 157 | * (Subroutines are all force inlined in call graph) 158 | * Otherwise memset .bss segment to zero may corrupt call frame itself 159 | */ 160 | void _mainCRTStartup(void); 161 | void _mainCRTStartup(void) 162 | { 163 | extern int main(void); 164 | 165 | #if defined (__arm__) || defined(__thumb__) 166 | __asm ( 167 | "ldr r0, =__load_header\r\n" 168 | "bl __load_program\r\n" 169 | "b main\r\n" 170 | ); 171 | #elif defined (__riscv) 172 | __asm ( 173 | "la a0, __load_header\r\n" 174 | "call __load_program\r\n" 175 | "li a0, 0\r\n" 176 | "li a1, 0\r\n" 177 | "call main" 178 | ); 179 | #else 180 | #error "arch not supported!" 181 | #endif 182 | } 183 | --------------------------------------------------------------------------------