├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE.TXT ├── README.md ├── bin2hex ├── bootrom ├── async_task.c ├── async_task.h ├── bit_functions.S ├── bootrom.ld ├── bootrom_crc32.h ├── bootrom_main.c ├── bootrom_misc.S ├── bootrom_rt0.S ├── info_uf2.txt ├── mufplib-double.S ├── mufplib.S ├── program_flash_generic.c ├── program_flash_generic.h ├── resets.h ├── sftable.h ├── usb_boot_device.c ├── usb_boot_device.h ├── virtual_disk.c └── welcome.html ├── generator ├── CMakeLists.txt └── main.c ├── test ├── CMakeLists.txt ├── bit_functions_test.c ├── mem_functions_test.c ├── tc_rom_double.c ├── tc_rom_float.c └── tictoc.h └── usb_device_tiny ├── runtime.c ├── runtime.h ├── scsi.h ├── scsi_ir.h ├── usb_common.h ├── usb_device.c ├── usb_device.h ├── usb_device_private.h ├── usb_msc.c ├── usb_msc.h ├── usb_stream_helper.c ├── usb_stream_helper.h └── virtual_disk.h /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | cmake-* 3 | generated.h 4 | git_info.h 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pico_sdk"] 2 | path = pico_sdk 3 | url = https://github.com/raspberrypi/pico-sdk.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | #set(CMAKE_C_STANDARD 11) 3 | 4 | # Pull in PICO SDK (must be before project) 5 | include(pico_sdk/pico_sdk_init.cmake) 6 | 7 | project(pico_bootrom) 8 | 9 | if (NOT INCLUDE_TESTS) 10 | set(PICO_BARE_METAL 1) 11 | else() 12 | # SDK versions conflict with code to be tested 13 | set(SKIP_PICO_BIT_OPS 1) 14 | endif() 15 | 16 | # Add pico targets to the build 17 | pico_sdk_init() 18 | 19 | # We want to generate from WELCOME.HTM and 20 | include(ExternalProject) 21 | ExternalProject_Add(generator 22 | PREFIX generator 23 | SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/generator 24 | BINARY_DIR ${CMAKE_BINARY_DIR}/generator 25 | DOWNLOAD_COMMAND "" 26 | INSTALL_COMMAND "" 27 | ) 28 | 29 | add_executable(generate IMPORTED) 30 | # todo is there a better way - this breaks on windows I presume 31 | set_property(TARGET generate PROPERTY IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/generator/generate) 32 | add_dependencies(generate generator) 33 | 34 | set(GIT_INFO_H ${CMAKE_CURRENT_BINARY_DIR}/git_info.h) 35 | set(GENERATED_H ${CMAKE_CURRENT_BINARY_DIR}/generated.h) 36 | 37 | # order matches original makefile 38 | add_executable(bootrom 39 | bootrom/bootrom_rt0.S 40 | bootrom/bit_functions.S 41 | bootrom/bootrom_main.c 42 | bootrom/bootrom_misc.S 43 | bootrom/program_flash_generic.c 44 | bootrom/usb_boot_device.c 45 | bootrom/virtual_disk.c 46 | bootrom/async_task.c 47 | bootrom/mufplib.S 48 | bootrom/mufplib-double.S 49 | usb_device_tiny/runtime.c 50 | usb_device_tiny/usb_device.c 51 | usb_device_tiny/usb_msc.c 52 | usb_device_tiny/usb_stream_helper.c 53 | ) 54 | 55 | target_include_directories(bootrom PRIVATE bootrom usb_device_tiny ${CMAKE_CURRENT_BINARY_DIR}) 56 | 57 | add_custom_target(update_git_info DEPENDS ${GIT_INFO_H}) 58 | 59 | add_custom_command(OUTPUT ${GIT_INFO_H} 60 | COMMENT "Generating ${GIT_INFO_H}" 61 | DEPENDS ${CMAKE_CURRENT_LIST_DIR}/.git/index 62 | COMMAND ${CMAKE_COMMAND} -E echo_append " #define GIT_REV 0x" > ${GIT_INFO_H} 63 | COMMAND git rev-parse --short=8 HEAD >> ${GIT_INFO_H} 64 | ) 65 | 66 | add_custom_target(generate_header DEPENDS ${GENERATED_H}) 67 | add_custom_command(OUTPUT ${GENERATED_H} 68 | COMMENT "Generating ${GENERATED_H}" 69 | DEPENDS bootrom/info_uf2.txt bootrom/welcome.html usb_device_tiny/scsi_ir.h 70 | COMMAND generate ${CMAKE_CURRENT_LIST_DIR}/bootrom >${GENERATED_H} 71 | ) 72 | 73 | add_dependencies(bootrom update_git_info) 74 | add_dependencies(bootrom generate_header) 75 | 76 | # we need these for both compile and link due to flto 77 | function(target_cl_options option) 78 | target_compile_options(bootrom PRIVATE ${option}) 79 | target_link_options(bootrom PRIVATE ${option}) 80 | endfunction() 81 | 82 | target_cl_options("-fno-jump-tables") 83 | target_cl_options("-g") 84 | target_cl_options("-Os") 85 | target_cl_options("-ffunction-sections") 86 | target_cl_options("-fdata-sections") 87 | target_cl_options("-flto") 88 | target_cl_options("-nostdlib") 89 | target_cl_options("-nodefaultlibs") 90 | 91 | target_link_options(bootrom PRIVATE "LINKER:--build-id=none") 92 | target_link_options(bootrom PRIVATE "LINKER:--undefined=isr_irq5") 93 | target_link_options(bootrom PRIVATE "LINKER:--gc-sections") 94 | #target_link_options(bootrom PRIVATE "--save-temps") 95 | target_link_options(bootrom PRIVATE "--specs=nosys.specs") 96 | target_link_options(bootrom PRIVATE "-nostartfiles") 97 | 98 | #target_compile_options(bootrom PRIVATE --save-temps) 99 | target_cl_options("-Wall") 100 | target_cl_options("-Wextra") 101 | # Have seen some awful code without this: 102 | target_cl_options("--param") 103 | target_cl_options("max-completely-peel-times=4") 104 | 105 | target_compile_definitions(bootrom PRIVATE 106 | NDEBUG 107 | USE_PICOBOOT 108 | USB_MAX_ENDPOINTS=5 109 | COMPRESS_TEXT 110 | GENERAL_SIZE_HACKS 111 | BOOTROM_ONLY_SIZE_HACKS 112 | USE_16BIT_ROM_FUNCS 113 | USE_BOOTROM_GPIO 114 | 115 | # for 116 | USE_HW_DIV 117 | 118 | USE_POPCOUNT32 119 | USE_CLZ32 120 | USE_CTZ32 121 | USE_REVERSE32 122 | ) 123 | 124 | target_link_options(bootrom PRIVATE "LINKER:--script=${CMAKE_CURRENT_LIST_DIR}/bootrom/bootrom.ld") 125 | set_target_properties(bootrom PROPERTIES LINK_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/bootrom/bootrom.ld) 126 | target_link_libraries(bootrom PRIVATE 127 | hardware_resets 128 | hardware_regs 129 | hardware_structs 130 | pico_platform 131 | hardware_sync 132 | boot_uf2_headers 133 | boot_picoboot_headers 134 | ) 135 | 136 | function(pico_add_h32_output TARGET) 137 | find_package (Python3 REQUIRED COMPONENTS Interpreter) 138 | add_custom_command(TARGET ${TARGET} POST_BUILD 139 | COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/bin2hex -32 ${TARGET}.bin ${TARGET}.h32) 140 | endfunction() 141 | 142 | pico_add_bin_output(bootrom) 143 | pico_add_dis_output(bootrom) 144 | pico_add_map_output(bootrom) 145 | pico_add_hex_output(bootrom) 146 | pico_add_h32_output(bootrom) 147 | 148 | if (INCLUDE_TESTS) 149 | add_subdirectory(test) 150 | endif() 151 | 152 | 153 | -------------------------------------------------------------------------------- /LICENSE.TXT: -------------------------------------------------------------------------------- 1 | NOTE: this license applies to the contents of this repository, EXCLUDING the contents of the files bootrom/mufplib.S 2 | and bootrom/mufplib-double.S, which are licensed separately: 3 | 4 | Copyright 2020 (c) 2020 Raspberry Pi (Trading) Ltd. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 7 | following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 10 | disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products 16 | derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 19 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the B2 version of the RP2040 bootrom. 2 | 3 | The version on the chip was built in _Debug_ mode using GCC 9.3.1 (GNU Arm Embedded Toolchain 9-2020-q2-update). 4 | 5 | Note the GIT revision info (included in the bootrom) on chip does not match the GIT revision of this 6 | branch. Additionally, certain `DMB` instructions are encoded as `DMB SY` vs `DMB ISH` when building 7 | with current SDKs although this has no functional effect on the RP2040. 8 | -------------------------------------------------------------------------------- /bin2hex: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import re 5 | 6 | if __name__ == "__main__": 7 | if len(sys.argv) < 3: 8 | sys.exit("Usage: bin2hex [-] \ne.g. bin2hex -32 rom.bin rom.hex") 9 | size = 32 10 | argn = 1; 11 | m = re.match(r"-(\d+)", sys.argv[argn]) 12 | if m: 13 | size = int(m.group(1)) 14 | argn = argn + 1 15 | 16 | if size <= 0 or size % 8 != 0: 17 | sys.exit("Size must be a multiple of 8 (got {})".format(size)) 18 | 19 | if sys.argv[argn] == '-': 20 | ifile = sys.stdin.buffer 21 | else: 22 | ifile = open(sys.argv[argn], "rb") 23 | argn = argn + 1 24 | if sys.argv[argn] == '-': 25 | ofile = sys.stdout 26 | else: 27 | ofile = open(sys.argv[argn], "w") 28 | argn = argn + 1 29 | 30 | bytes_per_word = size // 8 31 | 32 | while True: 33 | bytes = ifile.read(bytes_per_word) 34 | if not bytes: 35 | break 36 | word = '' 37 | for i in range(bytes_per_word): 38 | word = ("%02X" % (bytes[i] if i < len(bytes) else 0)) + word 39 | ofile.write("%s\n" % word) 40 | 41 | ifile.close() 42 | ofile.close() 43 | -------------------------------------------------------------------------------- /bootrom/async_task.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include 8 | #include "runtime.h" 9 | #include "program_flash_generic.h" 10 | #include "async_task.h" 11 | #include "usb_boot_device.h" 12 | #include "usb_msc.h" 13 | #include "boot/picoboot.h" 14 | #include "hardware/sync.h" 15 | 16 | //#define NO_ASYNC 17 | //#define NO_ROM_READ 18 | 19 | CU_REGISTER_DEBUG_PINS(flash) 20 | 21 | static uint32_t _do_flash_enter_cmd_xip(); 22 | static uint32_t _do_flash_exit_xip(); 23 | static uint32_t _do_flash_erase_sector(uint32_t addr); 24 | static uint32_t _do_flash_erase_range(uint32_t addr, uint32_t len); 25 | static uint32_t _do_flash_page_program(uint32_t addr, uint8_t *data); 26 | static uint32_t _do_flash_page_read(uint32_t addr, uint8_t *data); 27 | static bool _is_address_safe_for_vectoring(uint32_t addr); 28 | 29 | // keep table of flash function pointers in case RPI user wants to redirect them 30 | static const struct flash_funcs { 31 | uint32_t size; 32 | uint32_t (*do_flash_enter_cmd_xip)(); 33 | uint32_t (*do_flash_exit_xip)(); 34 | uint32_t (*do_flash_erase_sector)(); 35 | uint32_t (*do_flash_erase_range)(uint32_t addr, uint32_t size); 36 | uint32_t (*do_flash_page_program)(uint32_t addr, uint8_t *data); 37 | uint32_t (*do_flash_page_read)(uint32_t addr, uint8_t *data); 38 | } default_flash_funcs = { 39 | .size = sizeof(struct flash_funcs), 40 | _do_flash_enter_cmd_xip, 41 | _do_flash_exit_xip, 42 | _do_flash_erase_sector, 43 | _do_flash_erase_range, 44 | _do_flash_page_program, 45 | _do_flash_page_read, 46 | }; 47 | 48 | const struct flash_funcs *flash_funcs; 49 | 50 | static uint32_t _do_flash_enter_cmd_xip() { 51 | usb_warn("flash ennter cmd XIP\n"); 52 | flash_enter_cmd_xip(); 53 | return 0; 54 | } 55 | 56 | static uint32_t _do_flash_exit_xip() { 57 | usb_warn("flash exit XIP\n"); 58 | DEBUG_PINS_SET(flash, 2); 59 | connect_internal_flash(); 60 | DEBUG_PINS_SET(flash, 4); 61 | flash_exit_xip(); 62 | DEBUG_PINS_CLR(flash, 6); 63 | #ifdef USE_BOOTROM_GPIO 64 | gpio_setup(); 65 | #endif 66 | return 0; 67 | } 68 | 69 | static uint32_t _do_flash_erase_sector(uint32_t addr) { 70 | usb_warn("erasing flash sector @%08x\n", (uint) addr); 71 | DEBUG_PINS_SET(flash, 2); 72 | flash_sector_erase(addr - XIP_MAIN_BASE); 73 | DEBUG_PINS_CLR(flash, 2); 74 | return 0; 75 | } 76 | 77 | static uint32_t _do_flash_erase_range(uint32_t addr, uint32_t len) { 78 | uint32_t end = addr + len; 79 | uint32_t ret = PICOBOOT_OK; 80 | while (addr < end && !ret) { 81 | ret = flash_funcs->do_flash_erase_sector(addr); 82 | addr += FLASH_SECTOR_ERASE_SIZE; 83 | } 84 | return ret; 85 | } 86 | 87 | static uint32_t _do_flash_page_program(uint32_t addr, uint8_t *data) { 88 | usb_warn("writing flash page @%08x\n", (uint) addr); 89 | DEBUG_PINS_SET(flash, 4); 90 | flash_page_program(addr - XIP_MAIN_BASE, data); 91 | DEBUG_PINS_CLR(flash, 4); 92 | // todo set error result 93 | return 0; 94 | } 95 | 96 | static uint32_t _do_flash_page_read(uint32_t addr, uint8_t *data) { 97 | DEBUG_PINS_SET(flash, 4); 98 | usb_warn("reading flash page @%08x\n", (uint) addr); 99 | flash_read_data(addr - XIP_MAIN_BASE, data, FLASH_PAGE_SIZE); 100 | DEBUG_PINS_CLR(flash, 4); 101 | // todo set error result 102 | return 0; 103 | } 104 | 105 | static bool _is_address_safe_for_vectoring(uint32_t addr) { 106 | // not we are inclusive at end to save arithmentic, and since we always checking for non empty ranges 107 | return is_address_ram(addr) && 108 | (addr < FLASH_VALID_BLOCKS_BASE || addr > FLASH_VALID_BLOCKS_BASE + FLASH_BITMAPS_SIZE); 109 | } 110 | 111 | static uint8_t _last_mutation_source; 112 | 113 | // NOTE for simplicity this returns error codes from PICOBOOT 114 | static uint32_t _execute_task(struct async_task *task) { 115 | uint32_t ret; 116 | if (watchdog_rebooting()) { 117 | return PICOBOOT_REBOOTING; 118 | } 119 | uint type = task->type; 120 | if (type & AT_VECTORIZE_FLASH) { 121 | if (task->transfer_addr & 1u) { 122 | return PICOBOOT_BAD_ALIGNMENT; 123 | } 124 | if (_is_address_safe_for_vectoring(task->transfer_addr) && 125 | _is_address_safe_for_vectoring(task->transfer_addr + sizeof(struct flash_funcs))) { 126 | memcpy((void *) task->transfer_addr, &default_flash_funcs, sizeof(struct flash_funcs)); 127 | flash_funcs = (struct flash_funcs *) task->transfer_addr; 128 | } else { 129 | return PICOBOOT_INVALID_ADDRESS; 130 | } 131 | } 132 | if (type & AT_EXCLUSIVE) { 133 | // we do this in executex task, so we know we aren't executing and virtual_disk_queue tasks at this moment 134 | usb_warn("SET EXCLUSIVE ACCESS %d\n", task->exclusive_param); 135 | async_disable_queue(&virtual_disk_queue, task->exclusive_param); 136 | if (task->exclusive_param == EXCLUSIVE_AND_EJECT) { 137 | msc_eject(); 138 | } 139 | } 140 | if (type & AT_EXIT_XIP) { 141 | ret = flash_funcs->do_flash_exit_xip(); 142 | if (ret) return ret; 143 | } 144 | if (type & AT_EXEC) { 145 | usb_warn("exec %08x\n", (uint) task->transfer_addr); 146 | // scary but true; note callee must not overflow our stack (note also we reuse existing field task->transfer_addr to save code/data space) 147 | (((void (*)()) (task->transfer_addr | 1u)))(); 148 | } 149 | if (type & (AT_WRITE | AT_FLASH_ERASE)) { 150 | if (task->check_last_mutation_source && _last_mutation_source != task->source) { 151 | return PICOBOOT_INTERLEAVED_WRITE; 152 | } 153 | _last_mutation_source = task->source; 154 | } 155 | if (type & AT_FLASH_ERASE) { 156 | usb_warn("request flash erase at %08x+%08x\n", (uint) task->erase_addr, (uint) task->erase_size); 157 | // todo maybe remove to save space 158 | if (task->erase_addr & (FLASH_SECTOR_ERASE_SIZE - 1)) return PICOBOOT_BAD_ALIGNMENT; 159 | if (task->erase_size & (FLASH_SECTOR_ERASE_SIZE - 1)) return PICOBOOT_BAD_ALIGNMENT; 160 | if (!(is_address_flash(task->erase_addr) && is_address_flash(task->erase_addr + task->erase_size))) { 161 | return PICOBOOT_INVALID_ADDRESS; 162 | } 163 | ret = flash_funcs->do_flash_erase_range(task->erase_addr, task->erase_size); 164 | if (ret) return ret; 165 | } 166 | bool direct_access = false; 167 | if (type & (AT_WRITE | AT_READ)) { 168 | if ((is_address_ram(task->transfer_addr) && is_address_ram(task->transfer_addr + task->data_length)) 169 | #ifndef NO_ROM_READ 170 | || (!(type & AT_WRITE) && is_address_rom(task->transfer_addr) && 171 | is_address_rom(task->transfer_addr + task->data_length)) 172 | #endif 173 | ) { 174 | direct_access = true; 175 | } else if ((is_address_flash(task->transfer_addr) && 176 | is_address_flash(task->transfer_addr + task->data_length))) { 177 | // flash 178 | if (task->transfer_addr & (FLASH_PAGE_SIZE - 1)) return PICOBOOT_BAD_ALIGNMENT; 179 | } else { 180 | // bad address 181 | return PICOBOOT_INVALID_ADDRESS; 182 | } 183 | if (type & AT_WRITE) { 184 | if (direct_access) { 185 | usb_warn("writing %08x +%04x\n", (uint) task->transfer_addr, (uint) task->data_length); 186 | uint32_t ff = (uintptr_t) flash_funcs; 187 | if (MAX(ff, task->transfer_addr) < 188 | MIN(ff + sizeof(struct flash_funcs), task->transfer_addr + task->data_length)) { 189 | usb_warn("RAM write overlaps vectors, reverting them to ROM\n"); 190 | flash_funcs = &default_flash_funcs; 191 | } 192 | memcpy((void *) task->transfer_addr, task->data, task->data_length); 193 | } else { 194 | assert(task->data_length <= FLASH_PAGE_SIZE); 195 | ret = flash_funcs->do_flash_page_program(task->transfer_addr, task->data); 196 | if (ret) return ret; 197 | } 198 | } 199 | if (type & AT_READ) { 200 | if (direct_access) { 201 | usb_warn("reading %08x +%04x\n", (uint) task->transfer_addr, (uint) task->data_length); 202 | memcpy(task->data, (void *) task->transfer_addr, task->data_length); 203 | } else { 204 | assert(task->data_length <= FLASH_PAGE_SIZE); 205 | ret = flash_funcs->do_flash_page_read(task->transfer_addr, task->data); 206 | if (ret) return ret; 207 | } 208 | } 209 | if (type & AT_ENTER_CMD_XIP) { 210 | ret = flash_funcs->do_flash_enter_cmd_xip(); 211 | if (ret) return ret; 212 | } 213 | } 214 | return PICOBOOT_OK; 215 | } 216 | 217 | // just put this here in case it is worth noinlining - not atm 218 | static void _task_copy(struct async_task *to, struct async_task *from) { 219 | //*to = *from; 220 | memcpy(to, from, sizeof(struct async_task)); 221 | } 222 | 223 | void reset_task(struct async_task *task) { 224 | memset0(task, sizeof(struct async_task)); 225 | } 226 | 227 | void queue_task(struct async_task_queue *queue, struct async_task *task, async_task_callback callback) { 228 | task->callback = callback; 229 | #ifdef ASYNC_TASK_REQUIRE_TASK_CALLBACK 230 | assert(callback); 231 | #endif 232 | assert(!task->result); // convention is that it is zero, so try to catch missing rest 233 | #ifdef NO_ASYNC 234 | task->result = _execute_task(task); 235 | _call_task_complete(task); 236 | #else 237 | if (queue->full) { 238 | usb_warn("overwriting already queued task for queue %p\n", queue); 239 | } 240 | _task_copy(&queue->task, task); 241 | queue->full = true; 242 | __sev(); 243 | #endif 244 | } 245 | 246 | static inline void _call_task_complete(struct async_task *task) { 247 | #ifdef ASYNC_TASK_REQUIRE_TASK_CALLBACK 248 | task->callback(task); 249 | #else 250 | if (task->callback) task->callback(task); 251 | #endif 252 | } 253 | 254 | bool dequeue_task(struct async_task_queue *queue, struct async_task *task_out) { 255 | #ifdef NO_ASYNC 256 | return false; 257 | #else 258 | bool have_task = false; 259 | uint32_t save = save_and_disable_interrupts(); 260 | __mem_fence_acquire(); 261 | if (queue->full) { 262 | _task_copy(task_out, &queue->task); 263 | queue->full = false; 264 | have_task = true; 265 | } 266 | restore_interrupts(save); 267 | return have_task; 268 | #endif 269 | } 270 | 271 | void execute_task(struct async_task_queue *queue, struct async_task *task) { 272 | if (queue->disable) 273 | task->result = 1; // todo better code (this is fine for now since we only ever disable virtual_disk queue which only cares where or not result is 0 274 | else 275 | task->result = _execute_task(task); 276 | uint32_t save = save_and_disable_interrupts(); 277 | _call_task_complete(task); 278 | restore_interrupts(save); 279 | } 280 | 281 | struct async_task_queue virtual_disk_queue; 282 | 283 | #ifndef NDEBUG 284 | static bool _worker_started; 285 | #endif 286 | 287 | static struct async_task _worker_task; 288 | 289 | void __attribute__((noreturn)) async_task_worker() { 290 | flash_funcs = &default_flash_funcs; 291 | #ifndef NDEBUG 292 | _worker_started = true; 293 | #endif 294 | do { 295 | if (dequeue_task(&virtual_disk_queue, &_worker_task)) { 296 | execute_task(&virtual_disk_queue, &_worker_task); 297 | } 298 | #ifdef USE_PICOBOOT 299 | else if (dequeue_task(&picoboot_queue, &_worker_task)) { 300 | execute_task(&picoboot_queue, &_worker_task); 301 | } 302 | #endif 303 | else { 304 | __wfe(); 305 | } 306 | } while (true); 307 | } 308 | -------------------------------------------------------------------------------- /bootrom/async_task.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _ASYNC_TASK_H 8 | #define _ASYNC_TASK_H 9 | 10 | // Simple async task without real locking... tasks execute in thread mode, they are queued by IRQs and state management and completion callbacks called with IRQs disabled 11 | // which effectively means everything but the task execution is single threaded; as such the task should used async_task structure for all input/output 12 | #include "runtime.h" 13 | 14 | #define ASYNC_TASK_REQUIRE_TASK_CALLBACK 15 | // bit field for task type 16 | #define AT_EXIT_XIP 0x01u 17 | #define AT_FLASH_ERASE 0x02u 18 | #define AT_READ 0x04u 19 | #define AT_WRITE 0x08u 20 | #define AT_EXCLUSIVE 0x10u 21 | #define AT_ENTER_CMD_XIP 0x20u 22 | #define AT_EXEC 0x40u 23 | #define AT_VECTORIZE_FLASH 0x80u 24 | 25 | struct async_task; 26 | 27 | typedef void (*async_task_callback)(struct async_task *task); 28 | 29 | #define FLASH_PAGE_SIZE 256u 30 | #define FLASH_PAGE_MASK (FLASH_PAGE_SIZE - 1u) 31 | #define FLASH_SECTOR_ERASE_SIZE 4096u 32 | 33 | enum task_source { 34 | TASK_SOURCE_VIRTUAL_DISK = 1, 35 | TASK_SOURCE_PICOBOOT, 36 | }; 37 | 38 | // copy by value task definition 39 | struct async_task { 40 | uint32_t token; 41 | uint32_t result; 42 | async_task_callback callback; 43 | 44 | // we only have one task type now, so inlining 45 | uint32_t transfer_addr; 46 | uint32_t erase_addr; 47 | uint32_t erase_size; 48 | uint8_t *data; 49 | uint32_t data_length; 50 | uint32_t picoboot_user_token; 51 | uint8_t type; 52 | uint8_t exclusive_param; 53 | // an identifier for the logical source of the task 54 | uint8_t source; 55 | // if true, fail the task if the source isn't the same as the last source that did a mutation 56 | bool check_last_mutation_source; 57 | }; 58 | 59 | // arguably a very short queue; there is only one up "next" item which is set by queue_task... 60 | // attempt to queue multiple items will overwrite (so generally use multiple queues) 61 | // 62 | // this purely allows us to queue one task from the IRQ scope to the worker scope while 63 | // that worker scope may still be executing the last one 64 | struct async_task_queue { 65 | struct async_task task; 66 | volatile bool full; 67 | volatile bool disable; 68 | }; 69 | 70 | // called by irq handler to queue a task 71 | void queue_task(struct async_task_queue *queue, struct async_task *task, async_task_callback callback); 72 | 73 | // called from thread to dequeue a task 74 | bool dequeue_task(struct async_task_queue *queue, struct async_task *task_out); 75 | 76 | // runs forever dispatch tasks 77 | void __attribute__((noreturn)) async_task_worker(); 78 | 79 | void reset_task(struct async_task *task); 80 | 81 | extern struct async_task_queue virtual_disk_queue; 82 | extern struct async_task_queue picoboot_queue; 83 | 84 | static inline void async_disable_queue(struct async_task_queue *queue, bool disable) { 85 | queue->disable = disable; 86 | } 87 | 88 | static inline void reset_queue(struct async_task_queue *queue) { 89 | queue->full = false; 90 | async_disable_queue(queue, false); 91 | } 92 | 93 | // async task needs to know where the flash bitmap is so it can avoid it 94 | #ifndef USB_BOOT_EXPANDED_RUNTIME 95 | #define FLASH_VALID_BLOCKS_BASE XIP_SRAM_BASE 96 | #else 97 | #define FLASH_VALID_BLOCKS_BASE (SRAM_BASE + 96 * 1024) 98 | #endif 99 | #define FLASH_BITMAPS_SIZE (XIP_SRAM_END - XIP_SRAM_BASE) 100 | 101 | #endif //ASYNC_TASK_H_ 102 | -------------------------------------------------------------------------------- /bootrom/bit_functions.S: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | // these are available and tested for all input values, though can certainly be omitted for space... 8 | 9 | // these numbers are for uniform input (average over uniform distribution of 32 bit input) cycle counts 10 | // NAME | TABLE | NON TABLE | BUILTIN | DESCRIPTION 11 | // -----------|-------|-----------|---------|------------- 12 | // CLZ32 | 13 | 9.6 | 20 | count of leading zeroes 13 | // CTZ32 | 12 | 11 | 20 | count of trailing zeroes 14 | // REVERSE32 | 21 | 22 | - | bit reverse 15 | // POPCOUNT32 | 18 | 20 | 22 | count of 1 bits 16 | // 17 | 18 | #define NEW_CLZ32 1 19 | #define NEW_CTZ32 1 20 | #define NEW_REVERSE32 1 21 | #define NEW_POPCOUNT32 1 22 | 23 | .cpu cortex-m0 24 | .thumb 25 | .section .text.bit_functions, "ax" 26 | 27 | #ifdef USE_POPCOUNT32 28 | .align 2 29 | .global popcount32 30 | .type popcount32,%function 31 | .thumb_func 32 | popcount32: 33 | #if NEW_POPCOUNT32 34 | ldr r1,=#0x49249249 35 | lsr r2,r0,#1 36 | and r2,r1 37 | lsr r3,r0,#2 38 | and r3,r1 39 | and r0,r1 40 | add r0,r2 41 | add r0,r3 @ xx0xx0xx0xx0xx0xx0xx0xx0xx0xx0xx 42 | lsr r1,r0,#3 43 | add r0,r1 @ xx***xxx***xxx***xxx***xxx***xxx 44 | ldr r1,=#0xC71C71C7 45 | and r0,r1 @ xx000xxx000xxx000xxx000xxx000xxx 46 | lsr r1,r0,#6 47 | add r0,r1 @ **00xxxx00****00xxxx00****00xxxx 48 | ldr r1,=#0x04004004 49 | mul r0,r1 50 | lsr r0,#26 51 | bx lr 52 | #else 53 | adr r1, popcount8_table 54 | uxtb r2, r0 55 | ldrb r3, [r1, r2] 56 | lsr r0, r0, #8 57 | uxtb r2, r0 58 | ldrb r2, [r1, r2] 59 | add r3, r2 60 | lsr r0, r0, #8 61 | uxtb r2, r0 62 | ldrb r2, [r1, r2] 63 | add r3, r2 64 | lsr r0, r0, #8 65 | ldrb r0, [r1, r0] 66 | add r0, r3 67 | bx lr 68 | #endif 69 | #endif 70 | 71 | #ifdef USE_REVERSE32 72 | .align 2 73 | .global reverse32 74 | .type reverse32,%function 75 | .thumb_func 76 | reverse32: 77 | #if NEW_REVERSE32 78 | ldr r2, =#0xcccccccc 79 | lsr r1, r2, #1 80 | eor r1, r2 81 | lsl r3, r0, #1 82 | and r3, r1 83 | and r0, r1 84 | lsr r0, #1 85 | orr r0, r3 86 | lsl r3, r0, #2 87 | and r3, r2 88 | and r0, r2 89 | lsr r0, r0, #2 90 | orr r3, r0 91 | ldr r2, =#0xf0f0f0f0 92 | lsl r0, r3, #4 93 | and r0, r2 94 | and r3, r2 95 | lsr r3, r3, #4 96 | orr r0, r3 97 | rev r0, r0 98 | bx lr 99 | #else 100 | adr r3, reverse8_table 101 | uxtb r1, r0 102 | lsr r0, #8 103 | ldrb r2, [r3, r1] 104 | uxtb r1, r0 105 | rev16 r2, r2 106 | ldrb r1, [r3, r1] 107 | orr r2, r1 108 | lsr r0, #8 109 | uxtb r1, r0 110 | ldrb r1, [r3, r1] 111 | rev r2, r2 112 | orr r2, r1 113 | lsr r1, r0, #8 114 | ldrb r0, [r3, r1] 115 | rev16 r2, r2 116 | orr r0, r2 117 | bx lr 118 | #endif 119 | #endif 120 | 121 | #ifdef USE_CLZ32 122 | .global clz32 123 | .type clz32,%function 124 | .thumb_func 125 | clz32: 126 | #if NEW_CLZ32 127 | adr r3, clz6_table 128 | lsr r1, r0, #16 129 | bne clz32_0_15_n 130 | //clz32_16_31_n: 131 | lsr r1, r0, #10 132 | bne clz32_16_21_l 133 | //clz32_22_31_n: 134 | lsr r1, r0, #4 135 | bne clz32_22_27_l 136 | //clz32_28_31_l: 137 | ldrb r0, [r3, r0] 138 | add r0, #28 - 2 // - 2 since we're using a 4 bit not 6 bit index 139 | bx lr 140 | clz32_16_21_l: 141 | ldrb r0, [r3, r1] 142 | add r0, #16 143 | bx lr 144 | clz32_22_27_l: 145 | ldrb r0, [r3, r1] 146 | add r0, #22 147 | bx lr 148 | clz32_0_15_n: 149 | lsr r0, r1, #10 150 | bne clz32_0_5_l 151 | //clz32_6_15_n: 152 | lsr r0, r1, #4 153 | bne clz32_6_11_l 154 | //clz32_12_15_l: 155 | ldrb r0, [r3, r1] 156 | add r0, #12 - 2 // - 2 since we're using a 4 bit not 6 bit index 157 | bx lr 158 | clz32_0_5_l: 159 | ldrb r0, [r3, r0] 160 | bx lr 161 | clz32_6_11_l: 162 | ldrb r0, [r3, r0] 163 | add r0, #6 164 | bx lr 165 | 166 | #else 167 | 168 | adr r3, clz8_table 169 | mov r2, #24 170 | lsr r1, r0, #16 171 | beq 1f 172 | sub r2, #16 173 | mov r0, r1 174 | 1: 175 | lsr r1, r0, #8 176 | beq 1f 177 | sub r2, #8 178 | mov r0, r1 179 | 1: 180 | ldrb r0, [r3, r0] 181 | add r0, r2 182 | bx lr 183 | #endif 184 | #endif 185 | 186 | #ifdef USE_CTZ32 187 | .global ctz32 188 | .type ctz32,%function 189 | .thumb_func 190 | ctz32: 191 | #if NEW_CTZ32 192 | adr r3, ctz6_table 193 | lsl r1, r0, #16 194 | beq ctz32_16_31_n 195 | ctz32_0_15_n: 196 | lsl r0, r1, #6 197 | beq ctz32_10_15_l 198 | //ctz32_22_31_n: 199 | lsl r1, r0, #6 200 | beq ctz32_4_9_l 201 | //ctz32_16_20_l: 202 | lsr r1, #28 203 | add r1, #16 204 | ldrb r0, [r3, r1] 205 | bx lr 206 | ctz32_10_15_l: 207 | lsr r1, #26 208 | ldrb r0, [r3, r1] 209 | add r0, #10 210 | bx lr 211 | ctz32_4_9_l: 212 | lsr r0, #26 213 | ldrb r0, [r3, r0] 214 | add r0, #4 215 | bx lr 216 | ctz32_16_31_n: 217 | lsl r1, r0, #6 218 | beq ctz32_26_31_l 219 | //ctz32_16_25_n: 220 | lsl r0, r1, #6 221 | beq ctz32_20_25_l 222 | //ctz32_16_20_l: 223 | lsr r0, #28 224 | add r0, #16 225 | ldrb r0, [r3, r0] 226 | add r0, #16 227 | bx lr 228 | ctz32_26_31_l: 229 | lsr r0, #26 230 | ldrb r0, [r3, r0] 231 | add r0, #26 232 | bx lr 233 | ctz32_20_25_l: 234 | lsr r1, #26 235 | ldrb r0, [r3, r1] 236 | add r0, #20 237 | bx lr 238 | #else 239 | adr r3, ctz8_table 240 | mov r2, #0 241 | lsl r1, r0, #16 242 | bne 1f 243 | add r2, #16 244 | lsr r0, r0, #16 245 | 1: 246 | lsl r1, r0, #24 247 | bne 1f 248 | add r2, #8 249 | rev16 r0, r0 250 | 1: 251 | uxtb r0, r0 252 | ldrb r0, [r3, r0] 253 | add r0, r2 254 | bx lr 255 | #endif 256 | #endif 257 | 258 | rt0_literals: 259 | .ltorg 260 | 261 | .align 2 262 | #ifdef USE_POPCOUNT32 263 | #if !NEW_POPCOUNT32 264 | .global popcount8_table 265 | popcount8_table: 266 | .byte 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05 267 | .byte 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06 268 | .byte 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06 269 | .byte 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07 270 | .byte 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06 271 | .byte 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07 272 | .byte 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07 273 | .byte 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08 274 | #endif 275 | #endif 276 | 277 | #ifdef USE_REVERSE32 278 | #if !NEW_REVERSE32 279 | .global reverse8_table 280 | reverse8_table: 281 | .byte 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8 282 | .byte 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc 283 | .byte 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa 284 | .byte 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe 285 | .byte 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9 286 | .byte 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd 287 | .byte 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb 288 | .byte 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff 289 | #endif 290 | #endif 291 | 292 | #ifdef USE_CLZ32 293 | #if NEW_CLZ32 294 | .global clz6_table 295 | clz6_table: 296 | .byte 0x08-2, 0x07-2, 0x06-2, 0x06-2, 0x05-2, 0x05-2, 0x05-2, 0x05-2, 0x04-2, 0x04-2, 0x04-2, 0x04-2, 0x04-2, 0x04-2, 0x04-2, 0x04-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2, 0x03-2 297 | .byte 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2, 0x02-2 298 | #else 299 | .global clz8_table 300 | clz8_table: 301 | .byte 0x08, 0x07, 0x06, 0x06, 0x05, 0x05, 0x05, 0x05, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 302 | .byte 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 303 | .byte 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 304 | .byte 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 305 | .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 306 | .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 307 | .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 308 | .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 309 | #endif 310 | #endif 311 | 312 | #ifdef USE_CTZ32 313 | .global ctz6_table 314 | #if NEW_CTZ32 315 | ctz6_table: 316 | .byte 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 317 | .byte 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 318 | #else 319 | .global ctz8_table 320 | ctz8_table: 321 | .byte 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 322 | .byte 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 323 | .byte 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 324 | .byte 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 325 | .byte 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 326 | .byte 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 327 | .byte 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 328 | .byte 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 329 | #endif 330 | #endif 331 | -------------------------------------------------------------------------------- /bootrom/bootrom.ld: -------------------------------------------------------------------------------- 1 | MEMORY { 2 | ROM(rx) : ORIGIN = 0x00000000, LENGTH = 16K 3 | SRAM(rwx) : ORIGIN = 0x20000000, LENGTH = 264K 4 | USBRAM(rw) : ORIGIN = 0x50100400, LENGTH = 3K 5 | } 6 | 7 | SECTIONS { 8 | . = ORIGIN(ROM); 9 | .text : { 10 | KEEP(*(.vectors)) 11 | *(.text*) 12 | KEEP(*(.rodata.keep*)) 13 | *(.rodata*) 14 | this_is_the_end_my_only_friend_the_end = .; 15 | . = ALIGN(LENGTH(ROM)); 16 | } >ROM =0x00be 17 | 18 | .data : { 19 | *(.data*) 20 | } >USBRAM 21 | 22 | .bss : { 23 | *(.bss*) 24 | } >USBRAM 25 | 26 | ASSERT(__irq5_vector == __vectors + 0x40 + 5 * 4, "too much data in middle of vector table") 27 | ASSERT(SIZEOF(.data) == 0, 28 | "ERROR: do not use static memory in bootrom! (.data)") 29 | 30 | /* Leave room above the stack for stage 2 load, so that stage 2 31 | can image SRAM from its beginning */ 32 | _stacktop = ORIGIN(SRAM) + LENGTH(SRAM) - 256; 33 | } 34 | -------------------------------------------------------------------------------- /bootrom/bootrom_crc32.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _BOOTROM_CRC32_H 8 | #define _BOOTROM_CRC32_H 9 | 10 | #include "pico/types.h" 11 | 12 | uint32_t crc32_small(const uint8_t *buf, unsigned int len, uint32_t seed); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /bootrom/bootrom_main.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include "program_flash_generic.h" 8 | #include "hardware/structs/clocks.h" 9 | #include "hardware/structs/pll.h" 10 | #include "hardware/structs/rosc.h" 11 | #include "hardware/structs/sio.h" 12 | #include "hardware/structs/ssi.h" 13 | #include "hardware/structs/watchdog.h" 14 | #include "hardware/structs/xip_ctrl.h" 15 | #include "hardware/structs/xosc.h" 16 | #include "hardware/sync.h" 17 | #include "hardware/resets.h" 18 | #include "usb_boot_device.h" 19 | #include "resets.h" 20 | 21 | #include "async_task.h" 22 | #include "bootrom_crc32.h" 23 | #include "runtime.h" 24 | #include "hardware/structs/usb.h" 25 | 26 | // From SDF + STA, plus 20% margin each side 27 | // CLK_SYS FREQ ON STARTUP (in MHz) 28 | // +----------------------- 29 | // | min | 1.8 | 30 | // | typ | 6.5 | 31 | // | max | 11.3 | 32 | // +----------------------+ 33 | #define ROSC_MHZ_MAX 12 34 | 35 | // Each attempt takes around 4 ms total with a 6.5 MHz boot clock 36 | #define FLASH_MAX_ATTEMPTS 128 37 | 38 | #define BOOT2_SIZE_BYTES 256 39 | #define BOOT2_FLASH_OFFS 0 40 | #define BOOT2_MAGIC 0x12345678 41 | #define BOOT2_BASE (SRAM_END - BOOT2_SIZE_BYTES) 42 | 43 | 44 | static uint8_t *const boot2_load = (uint8_t *const) BOOT2_BASE; 45 | static ssi_hw_t *const ssi = (ssi_hw_t *) XIP_SSI_BASE; 46 | 47 | extern void debug_trampoline(); 48 | 49 | // 3 cycles per count 50 | static inline void delay(uint32_t count) { 51 | asm volatile ( 52 | "1: \n\t" 53 | "sub %0, %0, #1 \n\t" 54 | "bne 1b" 55 | : "+r" (count) 56 | ); 57 | } 58 | 59 | static void _flash_boot() { 60 | connect_internal_flash(); 61 | flash_exit_xip(); 62 | 63 | // Repeatedly poll flash read with all CPOL CPHA combinations until we 64 | // get a valid 2nd stage bootloader (checksum pass) 65 | int attempt; 66 | for (attempt = 0; attempt < FLASH_MAX_ATTEMPTS; ++attempt) { 67 | unsigned int cpol_cpha = attempt & 0x3u; 68 | ssi->ssienr = 0; 69 | ssi->ctrlr0 = (ssi->ctrlr0 70 | & ~(SSI_CTRLR0_SCPH_BITS | SSI_CTRLR0_SCPOL_BITS)) 71 | | (cpol_cpha << SSI_CTRLR0_SCPH_LSB); 72 | ssi->ssienr = 1; 73 | 74 | flash_read_data(BOOT2_FLASH_OFFS, boot2_load, BOOT2_SIZE_BYTES); 75 | uint32_t sum = crc32_small(boot2_load, BOOT2_SIZE_BYTES - 4, 0xffffffff); 76 | if (sum == *(uint32_t *) (boot2_load + BOOT2_SIZE_BYTES - 4)) 77 | break; 78 | } 79 | 80 | if (attempt == FLASH_MAX_ATTEMPTS) 81 | return; 82 | 83 | // Take this opportunity to flush the flash cache, as the debugger may have 84 | // written fresh code in behind it. 85 | flash_flush_cache(); 86 | 87 | // Enter boot2 (thumb bit set). Exit pointer is passed in lr -- we pass 88 | // null, boot2 provides default for this case. 89 | // Addition performed inside asm because GCC *really* wants to store another constant 90 | uint32_t boot2_entry = (uintptr_t) boot2_load; 91 | const uint32_t boot2_exit = 0; 92 | asm volatile ( 93 | "add %0, #1\n" 94 | "mov lr, %1\n" 95 | "bx %0\n" 96 | : "+r" (boot2_entry) : "l" (boot2_exit) : 97 | ); 98 | __builtin_unreachable(); 99 | } 100 | 101 | // USB bootloader requires clk_sys and clk_usb at 48 MHz. For this to work, 102 | // xosc must be running at 12 MHz. It is possible that: 103 | // 104 | // - No crystal is present (and XI may not be properly grounded) 105 | // - xosc output is much greater than 12 MHz 106 | // 107 | // In this case we *must* leave clk_sys in a safe state, and ideally, never 108 | // return from this function. This is because boards which are not designed to 109 | // use USB will still enter the USB bootcode when booted with a blank flash. 110 | 111 | static void _usb_clock_setup() { 112 | // First make absolutely sure clk_ref is running: needed for resuscitate, 113 | // and to run clk_sys while configuring sys PLL. Assume that rosc is not 114 | // configured to run faster than clk_sys max (as this is officially out of 115 | // spec) 116 | // If user previously configured clk_ref to a different source (e.g. 117 | // GPINx), then halted that source, the glitchless mux can't switch away 118 | // from the dead source-- nothing we can do about this here. 119 | rosc_hw->ctrl = ROSC_CTRL_ENABLE_VALUE_ENABLE << ROSC_CTRL_ENABLE_LSB; 120 | hw_clear_bits(&clocks_hw->clk[clk_ref].ctrl, CLOCKS_CLK_REF_CTRL_SRC_BITS); 121 | 122 | // Resuscitate logic will switch clk_sys to clk_ref if it is inadvertently stopped 123 | clocks_hw->resus.ctrl = 124 | CLOCKS_CLK_SYS_RESUS_CTRL_ENABLE_BITS | 125 | (CLOCKS_CLK_SYS_RESUS_CTRL_TIMEOUT_RESET 126 | << CLOCKS_CLK_SYS_RESUS_CTRL_TIMEOUT_LSB); 127 | 128 | // Resetting PLL regs or changing XOSC range can glitch output, so switch 129 | // clk_sys away before touching. Not worried about clk_usb as USB is held 130 | // in reset. 131 | hw_clear_bits(&clocks_hw->clk[clk_sys].ctrl, CLOCKS_CLK_SYS_CTRL_SRC_BITS); 132 | while (!(clocks_hw->clk[clk_sys].selected & 1u)); 133 | // rosc can not (while following spec) run faster than clk_sys max, so 134 | // it's safe now to clear dividers in clkslices. 135 | clocks_hw->clk[clk_sys].div = 0x100; // int 1 frac 0 136 | clocks_hw->clk[clk_usb].div = 0x100; 137 | 138 | // Try to get the crystal running. If no crystal is present, XI should be 139 | // grounded, so STABLE counter will never complete. Poor designs might 140 | // leave XI floating, in which case we may eventually drop through... in 141 | // this case we rely on PLL not locking, and/or resuscitate counter. 142 | // 143 | // Don't touch range setting: user would only have changed if crystal 144 | // needs it, and running crystal out of range can produce glitchy output. 145 | // Note writing a "bad" value (non-aax) to RANGE has no effect. 146 | xosc_hw->ctrl = XOSC_CTRL_ENABLE_VALUE_ENABLE << XOSC_CTRL_ENABLE_LSB; 147 | while (!(xosc_hw->status & XOSC_STATUS_STABLE_BITS)); 148 | 149 | // Sys PLL setup: 150 | // - VCO freq 1200 MHz, so feedback divisor of 100. Range is 400 MHz to 1.6 GHz 151 | // - Postdiv1 of 5, down to 240 MHz (appnote recommends postdiv1 >= postdiv2) 152 | // - Postdiv2 of 5, down to 48 MHz 153 | // 154 | // Total postdiv of 25 means that too-fast xtal will push VCO out of 155 | // lockable range *before* clk_sys goes out of closure (factor of 1.88) 156 | reset_unreset_block_wait_noinline(RESETS_RESET_PLL_SYS_BITS); 157 | pll_sys_hw->cs = 1u << PLL_CS_REFDIV_LSB; 158 | pll_sys_hw->fbdiv_int = 100; 159 | pll_sys_hw->prim = 160 | (5u << PLL_PRIM_POSTDIV1_LSB) | 161 | (5u << PLL_PRIM_POSTDIV2_LSB); 162 | 163 | // Power up VCO, wait for lock 164 | hw_clear_bits(&pll_sys_hw->pwr, PLL_PWR_PD_BITS | PLL_PWR_VCOPD_BITS); 165 | while (!(pll_sys_hw->cs & PLL_CS_LOCK_BITS)); 166 | 167 | // Power up post-dividers, which ungates PLL final output 168 | hw_clear_bits(&pll_sys_hw->pwr, PLL_PWR_POSTDIVPD_BITS); 169 | 170 | // Glitchy switch of clk_usb, clk_sys aux to sys PLL output. 171 | clocks_hw->clk[clk_sys].ctrl = 0; 172 | clocks_hw->clk[clk_usb].ctrl = 173 | CLOCKS_CLK_USB_CTRL_ENABLE_BITS | 174 | (CLOCKS_CLK_USB_CTRL_AUXSRC_VALUE_CLKSRC_PLL_SYS 175 | << CLOCKS_CLK_USB_CTRL_AUXSRC_LSB); 176 | 177 | // Glitchless switch of clk_sys to aux source (sys PLL) 178 | hw_set_bits(&clocks_hw->clk[clk_sys].ctrl, CLOCKS_CLK_SYS_CTRL_SRC_BITS); 179 | while (!(clocks_hw->clk[clk_sys].selected & 0x2u)); 180 | } 181 | 182 | void __noinline __attribute__((noreturn)) async_task_worker_thunk(); 183 | 184 | static __noinline __attribute__((noreturn)) void _usb_boot(uint32_t _usb_activity_gpio_pin_mask, 185 | uint32_t disable_interface_mask) { 186 | reset_block_noinline(RESETS_RESET_USBCTRL_BITS); 187 | if (!running_on_fpga()) 188 | _usb_clock_setup(); 189 | unreset_block_wait_noinline(RESETS_RESET_USBCTRL_BITS); 190 | 191 | // Ensure timer and watchdog are running at approximately correct speed 192 | // (can't switch clk_ref to xosc at this time, as we might lose ability to resus) 193 | watchdog_hw->tick = 12u << WATCHDOG_TICK_CYCLES_LSB; 194 | hw_set_bits(&watchdog_hw->tick, WATCHDOG_TICK_ENABLE_BITS); 195 | 196 | // turn off XIP cache since we want to use it as RAM in case the USER wants to use it for a RAM only binary 197 | hw_clear_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_EN_BITS); 198 | // Don't clear out RAM - leave it to binary download to clear anything it needs cleared; anything BSS will be done by crt0.S on reset anyway 199 | 200 | // this is where the BSS is so clear it 201 | memset0(usb_dpram, USB_DPRAM_SIZE); 202 | 203 | // now we can finally initialize these 204 | #ifdef USE_BOOTROM_GPIO 205 | usb_activity_gpio_pin_mask = _usb_activity_gpio_pin_mask; 206 | #endif 207 | 208 | usb_boot_device_init(disable_interface_mask); 209 | 210 | // worker to run tasks on this thread (never returns); Note: USB code is IRQ driven 211 | // this thunk switches stack into USB DPRAM then calls async_task_worker 212 | async_task_worker_thunk(); 213 | } 214 | 215 | static void __attribute__((noreturn)) _usb_boot_reboot_wrapper() { 216 | _usb_boot(watchdog_hw->scratch[0], watchdog_hw->scratch[1]); 217 | } 218 | 219 | void __attribute__((noreturn)) reset_usb_boot(uint32_t _usb_activity_gpio_pin_mask, uint32_t _disable_interface_mask) { 220 | watchdog_hw->scratch[0] = _usb_activity_gpio_pin_mask; 221 | watchdog_hw->scratch[1] = _disable_interface_mask; 222 | watchdog_reboot((uintptr_t) _usb_boot_reboot_wrapper, SRAM_END, 10); 223 | while (true) __wfi(); 224 | } 225 | 226 | int main() { 227 | const uint32_t rst_mask = 228 | RESETS_RESET_IO_QSPI_BITS | 229 | RESETS_RESET_PADS_QSPI_BITS | 230 | RESETS_RESET_TIMER_BITS; 231 | reset_unreset_block_wait_noinline(rst_mask); 232 | 233 | // Workaround for behaviour of TXB0108 bidirectional level shifters on 234 | // FPGA platform (JIRA PRJMU-726), not used on ASIC 235 | if (running_on_fpga()) { 236 | *(io_rw_32 *) (IO_QSPI_BASE + 0xc) = 5; // GPIO_FUNC_PROC 237 | sio_hw->gpio_hi_out = 1u << 1; // Level high on CS pin 238 | sio_hw->gpio_hi_oe = 1u << 1; // Output enable 239 | sio_hw->gpio_hi_oe = 0; // Output disable 240 | } 241 | 242 | // Check CSn strap: delay for pullups to charge trace, then take a majority vote. 243 | delay(100 * ROSC_MHZ_MAX / 3); 244 | uint32_t sum = 0; 245 | for (int i = 0; i < 9; ++i) { 246 | delay(1 * ROSC_MHZ_MAX / 3); 247 | sum += (sio_hw->gpio_hi_in >> 1) & 1u; 248 | } 249 | 250 | if (sum >= 5) 251 | _flash_boot(); 252 | 253 | // note this never returns (and is marked as such) 254 | _usb_boot(0, 0); 255 | } 256 | -------------------------------------------------------------------------------- /bootrom/bootrom_misc.S: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | .cpu cortex-m0 8 | .thumb 9 | 10 | .section .text.crc, "ax" 11 | 12 | // Each byte digested MSB-first 13 | // r0: start pointer 14 | // r1: length in bytes 15 | // r2: checksum seed value 16 | 17 | .global crc32_small 18 | .type crc32_small,%function 19 | .thumb_func 20 | crc32_small: 21 | push {r4, r5, lr} 22 | // r1 now end 23 | add r1, r0 24 | ldr r5, =#0x4c11db7 25 | 26 | b byte_loop_test 27 | byte_loop: 28 | ldrb r4, [r0] 29 | 30 | rev r3, r2 31 | eor r4, r3 32 | lsl r4, #24 33 | 34 | mov r3, #8 35 | bit_loop: 36 | lsl r4, #1 37 | bcc no_bit 38 | eor r4, r5 39 | no_bit: 40 | sub r3, #1 41 | bne bit_loop 42 | 43 | lsl r2, #8 44 | eor r2, r4 45 | 46 | add r0, #1 47 | byte_loop_test: 48 | cmp r0, r1 49 | blt byte_loop 50 | 51 | mov r0, r2 52 | pop {r4, r5, pc} 53 | 54 | #ifdef COMPRESS_TEXT 55 | .global poor_mans_text_decompress 56 | .type poor_mans_text_decompress,%function 57 | .thumb_func 58 | poor_mans_text_decompress: 59 | push {r4, r5, lr} 60 | 61 | neg r1, r1 62 | 1: 63 | // r3 = char 64 | ldrb r3, [r0, r1] 65 | add r2, #1 66 | lsr r4, r3, #8 67 | sbc r5, r5 68 | // r5 is -1 for regular character 69 | bne 3f 70 | 71 | sxtb r5, r3 72 | add r1, #1 73 | ldrb r4, [r0, r1] 74 | // note we expect clear carry (so this subtracts an extra 1), which is fine, because r1 is < 0 still 75 | sbc r2, r5 76 | sub r4, r2, r4 77 | // do copy 78 | 2: 79 | ldrb r3, [r4, r5] 80 | 3: 81 | strb r3, [r2, r5] 82 | add r5, #1 83 | bne 2b 84 | 85 | add r1, #1 86 | bne 1b 87 | 88 | pop {r4, r5, pc} 89 | #else 90 | #WHY NOT? 91 | #endif 92 | 93 | // memset function assuming r0 and r1 are both word aligned 94 | .global __memcpy_44 95 | .type __memcpy_44,%function 96 | .thumb_func 97 | __memcpy_44: 98 | mov ip, r0 99 | cmp r2, #8 100 | blo _memcpy_short 101 | push {r4-r6} 102 | b _memcpy_aligned 103 | 104 | __memcpy_slow_lp: 105 | sub r2, #1 106 | ldrb r3, [r1, r2] 107 | strb r3, [r0, r2] 108 | bne __memcpy_slow_lp 109 | mov r0, ip 110 | bx lr 111 | 112 | .align 2 113 | .global __memcpy 114 | .type __memcpy,%function 115 | .thumb_func 116 | __memcpy: 117 | mov ip, r0 118 | cmp r2, #8 119 | blo _memcpy_short 120 | sub r3, r0, r1 121 | lsl r3, #30 122 | bne __memcpy_slow_lp 123 | 124 | // r0 and r1 are co-aligned 125 | push {r4-r6} 126 | sub r1, r0 127 | mov r5, r0 128 | lsr r3, r0, #1 129 | bcc 1f 130 | 131 | // byte at odd address 132 | ldrb r4, [r0, r1] 133 | strb r4, [r0] 134 | add r0, #1 135 | 1: 136 | lsr r3, r0, #2 137 | bcc 1f 138 | 139 | // halfword on non word boundary 140 | ldrh r4, [r0, r1] 141 | strh r4, [r0] 142 | add r0, #2 143 | 1: 144 | // adjust length 145 | add r1, r0 146 | sub r5, r0 147 | add r2, r5 148 | 149 | _memcpy_aligned: 150 | sub r2, #16 151 | bcc 5f 152 | .align 2 153 | // 16 byte loop 154 | 1: 155 | ldmia r1!, {r3, r4, r5, r6} 156 | stmia r0!, {r3, r4, r5, r6} 157 | sub r2, #16 158 | bcs 1b 159 | 5: 160 | // 8 bytes remainder? 161 | lsl r2, #29 162 | bcc 1f 163 | ldmia r1!, {r3, r4} 164 | stmia r0!, {r3, r4} 165 | 1: 166 | // 4 bytes remainder? 167 | lsl r2, #1 168 | bcc 1f 169 | ldmia r1!, {r3} 170 | stmia r0!, {r3} 171 | 1: 172 | // early out for word aligned ending 173 | beq 2f 174 | // 2 bytes remainder? 175 | lsl r2, #1 176 | bcc 1f 177 | ldrh r3, [r1] 178 | strh r3, [r0] 179 | beq 2f 180 | add r1, #2 181 | add r0, #2 182 | // note fall thru into branch that wont take 183 | 1: 184 | // 1 bytes remainder? 185 | beq 2f 186 | ldrb r3, [r1] 187 | strb r3, [r0] 188 | 189 | 2: 190 | pop {r4-r6} 191 | mov r0, ip 192 | bx lr 193 | 194 | _memcpy_short: 195 | adr r3, _memcpy_short_end 196 | lsl r2, #2 197 | sub r3, r2 198 | add r3, #1 199 | bx r3 200 | 201 | .align 2 202 | ldrb r3, [r1, #6] 203 | strb r3, [r0, #6] 204 | ldrb r3, [r1, #5] 205 | strb r3, [r0, #5] 206 | ldrb r3, [r1, #4] 207 | strb r3, [r0, #4] 208 | ldrb r3, [r1, #3] 209 | strb r3, [r0, #3] 210 | ldrb r3, [r1, #2] 211 | strb r3, [r0, #2] 212 | ldrb r3, [r1, #1] 213 | strb r3, [r0, #1] 214 | ldrb r3, [r1, #0] 215 | strb r3, [r0, #0] 216 | _memcpy_short_end: 217 | mov r0, ip 218 | bx lr 219 | 220 | // memset function assuming r0 is word aligned 221 | .global __memset_4 222 | .type __memset_4,%function 223 | .thumb_func 224 | __memset_4: 225 | mov ip, r0 226 | uxtb r1, r1 227 | lsl r3, r1, #8 228 | orr r1, r3 229 | b _memset_word_boundary_with_hword_r1 230 | 231 | .align 2 232 | .global __memset 233 | .type __memset,%function 234 | .thumb_func 235 | __memset: 236 | mov ip, r0 237 | cmp r2, #8 238 | blo _memset_short 239 | 240 | lsr r3, r0, #1 241 | bcc 1f 242 | // byte at odd address 243 | strb r1, [r0] 244 | add r0, #1 245 | 1: 246 | 247 | // do byte->hword early 248 | uxtb r1, r1 249 | lsl r3, r1, #8 250 | orr r1, r3 251 | lsr r3, r0, #2 252 | bcc 1f 253 | 254 | // halfword on non word boundary 255 | strh r1, [r0] 256 | add r0, #2 257 | 1: 258 | 259 | // adjust length 260 | mov r3, ip 261 | sub r3, r0 262 | add r2, r3 263 | 264 | _memset_word_boundary_with_hword_r1: 265 | // at this point we already have r1 == 0000XXXX so extends to r1 = XXXXXXXX r3 = XXXXXXXX 266 | rev r3, r1 267 | orr r1, r3 268 | mov r3, r1 269 | 270 | sub r2, #16 271 | bcc 5f 272 | 273 | // extend to r4 = XXXXXXXX r5 = XXXXXXXX 274 | push {r4-r5} 275 | mov r4, r1 276 | mov r5, r1 277 | .align 2 278 | // 16 byte loop 279 | 1: 280 | stmia r0!, {r1, r3, r4, r5} 281 | sub r2, #16 282 | bcs 1b 283 | pop {r4-r5} 284 | 5: 285 | // 8 bytes remainder? 286 | lsl r2, #29 287 | bcc 1f 288 | stmia r0!, {r1, r3} 289 | 1: 290 | // 4 bytes remainder? 291 | lsl r2, #1 292 | bcc 1f 293 | stmia r0!, {r1} 294 | 1: 295 | // early out for word aligned ending 296 | beq 2f 297 | // 2 bytes remainder? 298 | lsl r2, #1 299 | bcc 1f 300 | strh r1, [r0] 301 | beq 2f 302 | add r0, #2 303 | // note fall thru into branch that wont take 304 | 1: 305 | // 1 byte remainder? 306 | beq 2f 307 | strb r1, [r0] 308 | 2: 309 | mov r0, ip 310 | bx lr 311 | 312 | _memset_short: 313 | adr r3, _memset_short_end 314 | sub r3, r2 315 | sub r3, r2 316 | add r3, #1 317 | bx r3 318 | 319 | strb r1, [r0, #6] 320 | strb r1, [r0, #5] 321 | strb r1, [r0, #4] 322 | strb r1, [r0, #3] 323 | strb r1, [r0, #2] 324 | strb r1, [r0, #1] 325 | strb r1, [r0, #0] 326 | _memset_short_end: 327 | mov r0, ip 328 | bx lr 329 | -------------------------------------------------------------------------------- /bootrom/bootrom_rt0.S: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | 8 | // ---------------------------------------------------------------------------- 9 | // Bootrom Runtime 0 10 | // ---------------------------------------------------------------------------- 11 | // This is not a full crt0 -- in particular, no .bss or .data initialisation 12 | // (use of .data/.bss is disallowed via linker script assertions). 13 | // The bootrom is not permitted to use statically-allocated memory, as parts of 14 | // it are called into by user code. 15 | // The purpose of this file is: 16 | // - Provide initial entry point for both cores 17 | // - Provide holding pen and launch code for core 1 18 | // - Provide direct-boot entry for core 0, mainly intended for running 19 | // ARM code during ATE 20 | // - Pass core 0 control over to the main flash boot sequence 21 | 22 | #include "hardware/regs/addressmap.h" 23 | #include "hardware/regs/pads_bank0.h" 24 | #include "hardware/regs/resets.h" 25 | #include "hardware/regs/sio.h" 26 | #include "hardware/regs/watchdog.h" 27 | #include "hardware/regs/syscfg.h" 28 | #include "hardware/regs/clocks.h" 29 | #include "hardware/regs/vreg_and_chip_reset.h" 30 | #include "hardware/regs/m0plus.h" 31 | #include "git_info.h" 32 | 33 | .cpu cortex-m0 34 | .thumb 35 | .section .vectors 36 | .balign 2 37 | 38 | .global __vectors 39 | __vectors: 40 | .word _stacktop // MSP 41 | .word _start // Reset 42 | .word _nmi // NMI 43 | .word _dead // HardFault 44 | 45 | .global _magic 46 | _magic: 47 | # magic 48 | .byte 'M', 'u' 49 | # compatibility version (change if function table is incompatible, or functions are backwards incompatible) 50 | .byte 1 51 | # ROM version 52 | .byte 3 53 | 54 | .global _well_known 55 | _well_known: 56 | .hword function_table 57 | .hword data_table 58 | .hword table_lookup + 1 59 | .hword 0 // pad 60 | 61 | .global table_lookup 62 | .type table_lookup,%function 63 | .thumb_func 64 | table_lookup: 65 | mov r3, #0 66 | ldrh r2, [r0] 67 | cmp r2, r3 68 | beq 1f 69 | ldrh r3, [r0, #2] 70 | add r0, #4 71 | cmp r1, r2 72 | bne table_lookup 73 | 1: 74 | mov r0, r3 75 | bx lr 76 | 77 | .global _dead 78 | .type _dead,%function 79 | .thumb_func 80 | _dead: // in place of irq4 vector 81 | wfi 82 | b _dead 83 | 84 | // this is all a bit cheeky, but the existing code assumed that the table above could go there because nothing ever called 85 | // some code in irq0 -4 slots 86 | 87 | // If you reach this, something has gone wrong. Most likely, the debugger 88 | // has done a core-only reset while the NMI mask was set, and a relevant 89 | // IRQ was asserted. This is probably not intentional, and is confusing 90 | // to debug, so we should just clear the NMI masks. 91 | .align 2 92 | .global _nmi 93 | .type _nmi,%function 94 | .thumb_func 95 | _nmi: 96 | // we do not want to use any stack as we're called from the core 0 boot path. 97 | // we use r12 as this is saved/restored by the processor in an actual NMI 98 | mov r12, lr 99 | // We can take an NMI straight out of reset, so we should first ensure 100 | // that SYSCFG is being clocked, else we end up straight back in NMI 101 | bl enable_clocks 102 | // Then just clear the NMI mask (for both cores) 103 | ldr r0, =(SYSCFG_BASE + SYSCFG_PROC0_NMI_MASK_OFFSET) 104 | mov r1, #0 105 | str r1, [r0] 106 | str r1, [r0, #4] 107 | mov pc, r12 108 | 109 | // On a cold boot, the clocks will already be enabled, because the power-on state 110 | // machine will have reset the clock controls. However we can have trouble on a warm 111 | // boot, that is to say: 112 | // - The debugger has just reset the processors and started them running 113 | // - The watchdog has fired, with WDSEL selecting a restart point after 114 | // clocks_bank_default. 115 | // Assume that enough clocks are already enabled to run this code! 116 | // Note it is NOT recommended to disable things like ROM clock if WDSEL is 117 | // later than CLOCKS_BANK_DEFAULT. 118 | .global enable_clocks 119 | .type enable_clocks,%function 120 | .thumb_func 121 | enable_clocks: 122 | ldr r0, =(CLOCKS_BASE + CLOCKS_WAKE_EN0_OFFSET) 123 | // Set entire clock enable mask. Relying on HCGs to avoid huge current transient 124 | mov r1, #0 125 | mvn r1, r1 126 | str r1, [r0] 127 | str r1, [r0, #4] 128 | // we steal the return for its own function 129 | .global _noop 130 | .type _noop,%function 131 | .thumb_func 132 | _noop: 133 | bx lr 134 | 135 | .align 2 136 | .global software_git_revision 137 | software_git_revision: 138 | .word GIT_REV 139 | 140 | .global __irq5_vector 141 | __irq5_vector: 142 | .word isr_irq5 143 | 144 | copyright: 145 | .string "(C) 2020 Raspberry Pi Trading Ltd" 146 | 147 | 148 | function_table: 149 | # function table 150 | #ifdef USE_POPCOUNT32 151 | .byte 'P, '3' 152 | .hword popcount32 + 1 153 | #endif 154 | #ifdef USE_REVERSE32 155 | .byte 'R', '3' 156 | .hword reverse32 + 1 157 | #endif 158 | #ifdef USE_CLZ32 159 | .byte 'L', '3' 160 | .hword clz32 + 1 161 | #endif 162 | #ifdef USE_CTZ32 163 | .byte 'T', '3' 164 | .hword ctz32 + 1 165 | #endif 166 | .byte 'M', 'S' 167 | .hword __memset + 1 168 | .byte 'S', '4' 169 | .hword __memset_4 + 1 170 | .byte 'M', 'C' 171 | .hword __memcpy + 1 172 | .byte 'C', '4' 173 | .hword __memcpy_44 + 1 174 | .byte 'U', 'B' 175 | .hword reset_usb_boot + 1 176 | .byte 'D', 'T' 177 | .hword debug_trampoline + 1 178 | .byte 'D', 'E' 179 | .hword debug_trampoline_end + 1 180 | .byte 'W', 'V' 181 | .hword wait_for_vector + 1 182 | .byte 'I', 'F' 183 | .hword connect_internal_flash + 1 184 | .byte 'E', 'X' 185 | .hword flash_exit_xip + 1 186 | .byte 'R', 'E' 187 | .hword flash_range_erase + 1 188 | .byte 'R', 'P' 189 | .hword flash_range_program + 1 190 | .byte 'F', 'C' 191 | .hword flash_flush_cache + 1 192 | .byte 'C', 'X' 193 | .hword flash_enter_cmd_xip + 1 194 | # end of function table marker 195 | .hword 0 196 | 197 | .global data_table 198 | data_table: 199 | .byte 'G', 'R' 200 | .hword software_git_revision 201 | .byte 'C', 'R' 202 | .hword copyright 203 | .byte 'S', 'F' 204 | .hword soft_float_table 205 | .byte 'S', 'D' 206 | .hword soft_double_table 207 | .byte 'F', 'Z' 208 | .hword soft_float_table_size 209 | // expose library start and end to facilitate users copying into RAM 210 | .byte 'F, 'S' 211 | .hword mufp_lib_start 212 | .byte 'F, 'E' 213 | .hword mufp_lib_end 214 | // expose library start and end to facilitate users copying into RAM 215 | .byte 'D, 'S' 216 | .hword mufp_lib_double_start 217 | .byte 'D, 'E' 218 | .hword mufp_lib_double_end 219 | .hword 0 220 | 221 | // ---------------------------------------------------------------------------- 222 | // Entry point for both cores 223 | // ---------------------------------------------------------------------------- 224 | 225 | .global _start 226 | .type _start,%function 227 | .thumb_func 228 | _start: 229 | 230 | // Check if this is core 0, and go to holding pen if not 231 | check_core: 232 | // NOTE: We DO NOT use any stack prior to possible watchdog entry (this includes NMI vector handler) 233 | ldr r0, =SIO_BASE 234 | ldr r1, [r0, #SIO_CPUID_OFFSET] 235 | cmp r1, #0 236 | bne wait_for_vector 237 | 238 | // Make sure all the control registers we are about to access are being clocked. 239 | // On a cold boot everything will be set up by the power-on state machine, 240 | // but the clock setup may be dirty on a warm boot. 241 | 242 | // note that the NMI handler does exactly what we want (enable_clocks) and also disables NMI 243 | bl _nmi 244 | 245 | // If the rescue flag is set in PoR block, we should halt immediately. 246 | // (presumably some lethal code is in flash which would stop the debugger from 247 | // communicating with the processors). 248 | check_rescue: 249 | ldr r1, =(VREG_AND_CHIP_RESET_BASE + VREG_AND_CHIP_RESET_CHIP_RESET_OFFSET) 250 | ldr r2, [r1] 251 | #if VREG_AND_CHIP_RESET_CHIP_RESET_PSM_RESTART_FLAG_BITS != 0x01000000 252 | #error 253 | #endif 254 | lsr r3, r2, #25 255 | bcc 1f 256 | // Acknowledge and halt (note we write all bits, but VREG_AND_CHIP_RESET_CHIP_RESET_PSM_RESTART_FLAG_BITS 257 | // is the only WC bit 258 | str r2, [r1] 259 | b _dead 260 | 1: 261 | 262 | disable_adc_ie: 263 | #if RESETS_RESET_PADS_BANK0_BITS != (0x80 << 1) 264 | #error 265 | #endif 266 | mov r0, #0x80 267 | lsl r0, #1 268 | bl unreset_block_wait_noinline 269 | 270 | ldr r1, =PADS_BANK0_BASE + REG_ALIAS_CLR_BITS 271 | mov r2, #PADS_BANK0_GPIO0_IE_BITS 272 | str r2, [r1, #PADS_BANK0_GPIO26_OFFSET] 273 | str r2, [r1, #PADS_BANK0_GPIO27_OFFSET] 274 | str r2, [r1, #PADS_BANK0_GPIO28_OFFSET] 275 | str r2, [r1, #PADS_BANK0_GPIO29_OFFSET] 276 | 277 | // Check watchdog scratch for direct-boot magic numbers 278 | // This is useful in factory test for running ARM code without accessing DAP. 279 | // Probably also useful for imaginative software engineers 280 | // - Scratch 4: 0xb007c0d3 281 | // - Scratch 5: Entry point ^ -0xb007c0d3 282 | // - Scratch 6: Stack pointer 283 | // - Scratch 7: Entry point 284 | check_wdog: 285 | ldr r7, =(WATCHDOG_BASE + WATCHDOG_SCRATCH4_OFFSET) 286 | ldr r6, =(0xb007c0d3) 287 | ldmia r7!, {r0, r1, r2, r3} 288 | cmp r0, r6 289 | bne 1f 290 | eor r1, r3 291 | add r0, r1 292 | bne 1f 293 | // Clear magic number for next time (note -16 because of r7! above) 294 | sub r7, #16 295 | str r0, [r7] 296 | // Magic numbers look good, so jump straight into the code that has been prepared for us 297 | msr msp, r2 298 | // Note if this return, we continue with regular boot below 299 | blx r3 300 | // Proceed to main flash boot sequence 301 | 1: 302 | // main does not return 303 | bl main 304 | // b _dead 305 | 306 | // ---------------------------------------------------------------------------- 307 | // Hold/launch code for Core 1 308 | // ---------------------------------------------------------------------------- 309 | // Core 0 will bring core 1 up once it has gone through the sequence of setting 310 | // up flash etc. 311 | // 312 | 313 | send_and_then_again: 314 | // in case of multiple core 1 resets, we can keep pushing and fill the FIFO 315 | // we should wait for an event if the FIFO is full to avoid busy wait 316 | wfe 317 | // takes r0 = word to send, r4 = SIOB_BASE, r5 link register 318 | send_and_then: 319 | ldr r1, [r4, #SIO_FIFO_ST_OFFSET] 320 | lsr r1, #SIO_FIFO_ST_RDY_LSB + 1 321 | bcc send_and_then_again 322 | str r0, [r4, #SIO_FIFO_WR_OFFSET] 323 | sev 324 | add r6, r5, #1 325 | bx r6 326 | 327 | #define M0_BASE (PPB_BASE + M0PLUS_CPUID_OFFSET) 328 | 329 | wait_for_vector: 330 | ldr r4, =SIO_BASE 331 | ldr r7, =M0_BASE 332 | // Enable SCR.SLEEPDEEP before WFE -- this allows NVIC to be fully gated during sleep 333 | mov r1, #M0PLUS_SCR_SLEEPDEEP_BITS 334 | str r1, [r7, #(M0PLUS_SCR_OFFSET - M0PLUS_CPUID_OFFSET)] 335 | // note core_0_handshake_loop is the intended next instruction, but the read is harmless 336 | // as we're about to drain, so don't waste an instruction branching 337 | 1: 338 | ldr r1, [r4, #SIO_FIFO_RD_OFFSET] 339 | core_0_handshake_loop: 340 | // drain the FIFO before sending 0 341 | ldr r1, [r4, #SIO_FIFO_ST_OFFSET] 342 | lsr r1, #SIO_FIFO_ST_VLD_LSB + 1 343 | bcs 1b 344 | 345 | // ...and_then = receive_and_check_zero (which jmps to core_0_handshake_loop on 0) 346 | adr r5, receive_and_check_zero 347 | // send 0 348 | mov r0, #0 349 | bl send_and_then 350 | // check for cmd 1 351 | cmp r0, #1 352 | bne core_0_handshake_loop 353 | // ack and receive VTOR 354 | bl send_and_then 355 | str r0, [r7, #(M0PLUS_VTOR_OFFSET - M0PLUS_CPUID_OFFSET)] 356 | // ack and receive SP 357 | bl send_and_then 358 | // initialize 359 | msr msp, r0 360 | bl send_and_then 361 | adr r5, core1_launch 362 | // receive IP (0 sends us back into handshake loop) 363 | bl send_and_then 364 | // nop // .. for alignment 365 | core1_launch: 366 | // Disable SLEEPDEEP before exiting, as it affects wake latency 367 | mov r1, #0 368 | str r1, [r7, #(M0PLUS_SCR_OFFSET - M0PLUS_CPUID_OFFSET)] 369 | blx r0 370 | 371 | // Low power hang on return. Reset the core if you want to provide another entry point 372 | // (There is no need to return though) 373 | // 374 | // alternatively you could return directly to wait_for_vector (available in the function table) 375 | // if you know core 1 is still in a good state 376 | b _dead 377 | 378 | .align 2 379 | // takes r4 = SIOB_BASE 380 | // returns r0 = word received 381 | receive_and_check_zero: 382 | wfe 383 | ldr r0, [r4, #SIO_FIFO_ST_OFFSET] 384 | lsr r0, #SIO_FIFO_ST_VLD_LSB + 1 385 | bcc receive_and_check_zero 386 | 387 | ldr r0, [r4, #SIO_FIFO_RD_OFFSET] 388 | cmp r0, #0 389 | // if we received 0, we reset back to main loop 390 | beq core_0_handshake_loop 391 | bx lr 392 | 393 | .global reset_block_noinline 394 | .type reset_block_noinline,%function 395 | .thumb_func 396 | reset_block_noinline: 397 | ldr r1, =RESETS_BASE + REG_ALIAS_SET_BITS 398 | str r0, [r1] 399 | bx lr 400 | 401 | .global reset_unreset_block_wait_noinline 402 | .type reset_unreset_block_wait_noinline,%function 403 | .thumb_func 404 | reset_unreset_block_wait_noinline: 405 | ldr r1, =RESETS_BASE + REG_ALIAS_SET_BITS 406 | str r0, [r1] 407 | // fall thru 408 | 409 | .global unreset_block_wait_noinline 410 | .type unreset_block_wait_noinline,%function 411 | .thumb_func 412 | unreset_block_wait_noinline: 413 | ldr r1, =RESETS_BASE 414 | ldr r2, =RESETS_BASE + REG_ALIAS_CLR_BITS 415 | str r0, [r2] 416 | 1: 417 | ldr r2, [r1, #RESETS_RESET_DONE_OFFSET] 418 | bic r0, r2 419 | bne 1b 420 | bx lr 421 | 422 | // ---------------------------------------------------------------------------- 423 | // Simple debugger trampoline for break-on-return 424 | // ---------------------------------------------------------------------------- 425 | // This is handy for the debugger calling ROM routines without setting hardware 426 | // breakpoints, mainly useful for flash programming. 427 | // Set function address in r7, pass args through r0...r3 as per ABI, 428 | // then jump to this trampoline. 429 | 430 | .global debug_trampoline 431 | .type debug_trampoline,%function 432 | .thumb_func 433 | debug_trampoline: 434 | // Ensure the LSB is set (Thumb Mode) to avoid hardfault 435 | mov r6, #1 436 | orr r7, r7, r6 437 | blx r7 438 | debug_trampoline_end: 439 | bkpt #0 440 | b debug_trampoline 441 | 442 | .byte 0x11, 0x38, 0xc0, 0x7a, 0x00, 0xbd, 0x00, 0xb5 443 | .byte 0x42, 0x40, 0x00, 0x2a, 0x00, 0xf0, 0x02, 0xf8 444 | .byte 0xf6, 0xd2, 0x8e, 0x46, 0x70, 0x46, 0x00, 0x47 445 | zphd: 446 | 447 | soft_float_table_size: 448 | .byte (soft_float_table_end - soft_float_table) / 4 449 | .align 2 450 | soft_float_table: 451 | .word mufp_fadd 452 | .word mufp_fsub 453 | .word mufp_fmul 454 | .word mufp_fdiv 455 | .word mufp_fcmp_fast 456 | .word mufp_fcmp_fast_flags 457 | .word mufp_fsqrt 458 | .word mufp_float2int 459 | .word mufp_float2fix 460 | .word mufp_float2uint 461 | .word mufp_float2ufix 462 | .word mufp_int2float 463 | .word mufp_fix2float 464 | .word mufp_uint2float 465 | .word mufp_ufix2float 466 | .word mufp_fcos 467 | .word mufp_fsin 468 | .word mufp_ftan 469 | .word mufp_fsincos 470 | .word mufp_fexp 471 | .word mufp_fln 472 | 473 | .word mufp_fcmp_combined 474 | .word mufp_fatan2 475 | .word mufp_int642float 476 | .word mufp_fix642float 477 | .word mufp_uint642float 478 | .word mufp_ufix642float 479 | .word mufp_float2int64 480 | .word mufp_float2fix64 481 | .word mufp_float2uint64 482 | .word mufp_float2ufix64 483 | .word mufp_float2double 484 | soft_float_table_end: 485 | 486 | soft_double_table: 487 | .word mufp_dadd 488 | .word mufp_dsub 489 | .word mufp_dmul 490 | .word mufp_ddiv 491 | .word mufp_dcmp_fast 492 | .word mufp_dcmp_fast_flags 493 | .word mufp_dsqrt 494 | .word mufp_double2int 495 | .word mufp_double2fix 496 | .word mufp_double2uint 497 | .word mufp_double2ufix 498 | .word mufp_int2double 499 | .word mufp_fix2double 500 | .word mufp_uint2double 501 | .word mufp_ufix2double 502 | .word mufp_dcos 503 | .word mufp_dsin 504 | .word mufp_dtan 505 | .word mufp_dsincos 506 | .word mufp_dexp 507 | .word mufp_dln 508 | 509 | .word mufp_dcmp_combined 510 | .word mufp_datan2 511 | .word mufp_int642double 512 | .word mufp_fix642double 513 | .word mufp_uint642double 514 | .word mufp_ufix642double 515 | .word mufp_double2int64 516 | .word mufp_double2fix64 517 | .word mufp_double2uint64 518 | .word mufp_double2ufix64 519 | .word mufp_double2float 520 | soft_double_table_end: 521 | #if (soft_double_table_end - soft_double_table) != (soft_float_table_end - soft_float_table) 522 | #error FLOAT and DOUBLE table size mismatch 523 | #endif 524 | 525 | #define USB_BOOT_STACK_SIZE 300 526 | 527 | // we clear USB SRAM (aka .bss and stack), and switch stack 528 | .global async_task_worker_thunk 529 | .thumb_func 530 | async_task_worker_thunk: 531 | // set stack 532 | ldr r0, =usb_boot_stack_end 533 | msr MSP, r0 534 | bl async_task_worker 535 | // async_task_worker does not return 536 | 537 | _: 538 | 539 | .section .bss 540 | .align 2 541 | usb_boot_stack: 542 | .space USB_BOOT_STACK_SIZE * 4 543 | usb_boot_stack_end: 544 | -------------------------------------------------------------------------------- /bootrom/info_uf2.txt: -------------------------------------------------------------------------------- 1 | UF2 Bootloader v3.0 2 | Model: Raspberry Pi RP2 3 | Board-ID: RPI-RP2 4 | -------------------------------------------------------------------------------- /bootrom/program_flash_generic.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include "hardware/regs/io_qspi.h" 8 | #include "hardware/regs/pads_qspi.h" 9 | #include "hardware/structs/ssi.h" 10 | #include "hardware/structs/xip_ctrl.h" 11 | #include "hardware/resets.h" 12 | #include "program_flash_generic.h" 13 | #include "resets.h" 14 | 15 | // These are supported by almost any SPI flash 16 | #define FLASHCMD_PAGE_PROGRAM 0x02 17 | #define FLASHCMD_READ_DATA 0x03 18 | #define FLASHCMD_READ_STATUS 0x05 19 | #define FLASHCMD_WRITE_ENABLE 0x06 20 | #define FLASHCMD_SECTOR_ERASE 0x20 21 | #define FLASHCMD_READ_SFDP 0x5a 22 | #define FLASHCMD_READ_JEDEC_ID 0x9f 23 | 24 | // Annoyingly, structs give much better code generation, as they re-use the base 25 | // pointer rather than doing a PC-relative load for each constant pointer. 26 | 27 | static ssi_hw_t *const ssi = (ssi_hw_t *) XIP_SSI_BASE; 28 | 29 | // Sanity check 30 | #undef static_assert 31 | #define static_assert(cond, x) extern int static_assert[(cond)?1:-1] 32 | check_hw_layout(ssi_hw_t, ssienr, SSI_SSIENR_OFFSET); 33 | check_hw_layout(ssi_hw_t, spi_ctrlr0, SSI_SPI_CTRLR0_OFFSET); 34 | 35 | // ---------------------------------------------------------------------------- 36 | // Setup and generic access functions 37 | 38 | // Connect the XIP controller to the flash pads 39 | void __noinline connect_internal_flash() { 40 | // Use hard reset to force IO and pad controls to known state (don't touch 41 | // IO_BANK0 as that does not affect XIP signals) 42 | reset_unreset_block_wait_noinline(RESETS_RESET_IO_QSPI_BITS | RESETS_RESET_PADS_QSPI_BITS); 43 | 44 | // Then mux XIP block onto internal QSPI flash pads 45 | io_rw_32 *iobank1 = (io_rw_32 *) IO_QSPI_BASE; 46 | #ifndef GENERAL_SIZE_HACKS 47 | for (int i = 0; i < 6; ++i) 48 | iobank1[2 * i + 1] = 0; 49 | #else 50 | __asm volatile ( 51 | "str %1, [%0, #4];" \ 52 | "str %1, [%0, #12];" \ 53 | "str %1, [%0, #20];" \ 54 | "str %1, [%0, #28];" \ 55 | "str %1, [%0, #36];" \ 56 | "str %1, [%0, #44];" 57 | ::"r" (iobank1), "r" (0)); 58 | #endif 59 | } 60 | 61 | // Set up the SSI controller for standard SPI mode,i.e. for every byte sent we get one back 62 | // This is only called by flash_exit_xip(), not by any of the other functions. 63 | // This makes it possible for the debugger or user code to edit SPI settings 64 | // e.g. baud rate, CPOL/CPHA. 65 | void flash_init_spi() { 66 | // Disable SSI for further config 67 | ssi->ssienr = 0; 68 | // Clear sticky errors (clear-on-read) 69 | (void) ssi->sr; 70 | (void) ssi->icr; 71 | // Hopefully-conservative baud rate for boot and programming 72 | ssi->baudr = 6; 73 | ssi->ctrlr0 = 74 | (SSI_CTRLR0_SPI_FRF_VALUE_STD << SSI_CTRLR0_SPI_FRF_LSB) | // Standard 1-bit SPI serial frames 75 | (7 << SSI_CTRLR0_DFS_32_LSB) | // 8 clocks per data frame 76 | (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB); // TX and RX FIFOs are both used for every byte 77 | // Slave selected when transfers in progress 78 | ssi->ser = 1; 79 | // Re-enable 80 | ssi->ssienr = 1; 81 | } 82 | 83 | typedef enum { 84 | OUTOVER_NORMAL = 0, 85 | OUTOVER_INVERT, 86 | OUTOVER_LOW, 87 | OUTOVER_HIGH 88 | } outover_t; 89 | 90 | // Flash code may be heavily interrupted (e.g. if we are running USB MSC 91 | // handlers concurrently with flash programming) so we control the CS pin 92 | // manually 93 | static void __noinline flash_cs_force(outover_t over) { 94 | io_rw_32 *reg = (io_rw_32 *) (IO_QSPI_BASE + IO_QSPI_GPIO_QSPI_SS_CTRL_OFFSET); 95 | #ifndef GENERAL_SIZE_HACKS 96 | *reg = *reg & ~IO_QSPI_GPIO_QSPI_SS_CTRL_OUTOVER_BITS 97 | | (over << IO_QSPI_GPIO_QSPI_SS_CTRL_OUTOVER_LSB); 98 | #else 99 | // The only functions we want are FSEL (== 0 for XIP) and OUTOVER! 100 | *reg = over << IO_QSPI_GPIO_QSPI_SS_CTRL_OUTOVER_LSB; 101 | #endif 102 | // Read to flush async bridge 103 | (void) *reg; 104 | } 105 | 106 | // Put bytes from one buffer, and get bytes into another buffer. 107 | // These can be the same buffer. 108 | // If tx is NULL then send zeroes. 109 | // If rx is NULL then all read data will be dropped. 110 | // 111 | // If rx_skip is nonzero, this many bytes will first be consumed from the FIFO, 112 | // before reading a further count bytes into *rx. 113 | // E.g. if you have written a command+address just before calling this function. 114 | void __noinline flash_put_get(const uint8_t *tx, uint8_t *rx, size_t count, size_t rx_skip) { 115 | // Make sure there is never more data in flight than the depth of the RX 116 | // FIFO. Otherwise, when we are interrupted for long periods, hardware 117 | // will overflow the RX FIFO. 118 | const uint max_in_flight = 16 - 2; // account for data internal to SSI 119 | size_t tx_count = count; 120 | size_t rx_count = count; 121 | while (tx_count || rx_skip || rx_count) { 122 | // NB order of reads, for pessimism rather than optimism 123 | uint32_t tx_level = ssi_hw->txflr; 124 | uint32_t rx_level = ssi_hw->rxflr; 125 | bool did_something = false; // Expect this to be folded into control flow, not register 126 | if (tx_count && tx_level + rx_level < max_in_flight) { 127 | ssi->dr0 = (uint32_t) (tx ? *tx++ : 0); 128 | --tx_count; 129 | did_something = true; 130 | } 131 | if (rx_level) { 132 | uint8_t rxbyte = ssi->dr0; 133 | did_something = true; 134 | if (rx_skip) { 135 | --rx_skip; 136 | } else { 137 | if (rx) 138 | *rx++ = rxbyte; 139 | --rx_count; 140 | } 141 | } 142 | // APB load costs 4 cycles, so only do it on idle loops (our budget is 48 cyc/byte) 143 | if (!did_something && __builtin_expect(flash_was_aborted(), 0)) 144 | break; 145 | } 146 | flash_cs_force(OUTOVER_HIGH); 147 | } 148 | 149 | // Convenience wrapper for above 150 | // (And it's hard for the debug host to get the tight timing between 151 | // cmd DR0 write and the remaining data) 152 | void flash_do_cmd(uint8_t cmd, const uint8_t *tx, uint8_t *rx, size_t count) { 153 | flash_cs_force(OUTOVER_LOW); 154 | ssi->dr0 = cmd; 155 | flash_put_get(tx, rx, count, 1); 156 | } 157 | 158 | 159 | // Timing of this one is critical, so do not expose the symbol to debugger etc 160 | static inline void flash_put_cmd_addr(uint8_t cmd, uint32_t addr) { 161 | flash_cs_force(OUTOVER_LOW); 162 | addr |= cmd << 24; 163 | for (int i = 0; i < 4; ++i) { 164 | ssi->dr0 = addr >> 24; 165 | addr <<= 8; 166 | } 167 | } 168 | 169 | // GCC produces some heinous code if we try to loop over the pad controls, 170 | // so structs it is 171 | struct sd_padctrl { 172 | io_rw_32 sd0; 173 | io_rw_32 sd1; 174 | io_rw_32 sd2; 175 | io_rw_32 sd3; 176 | }; 177 | 178 | // Sequence: 179 | // 1. CSn = 1, IO = 4'h0 (via pulldown to avoid contention), x32 clocks 180 | // 2. CSn = 0, IO = 4'hf (via pullup to avoid contention), x32 clocks 181 | // 3. CSn = 1 (brief deassertion) 182 | // 4. CSn = 0, MOSI = 1'b1 driven, x16 clocks 183 | // 184 | // Part 4 is the sequence suggested in W25X10CL datasheet. 185 | // Parts 1 and 2 are to improve compatibility with Micron parts 186 | 187 | void __noinline flash_exit_xip() { 188 | struct sd_padctrl *qspi_sd_padctrl = (struct sd_padctrl *) (PADS_QSPI_BASE + PADS_QSPI_GPIO_QSPI_SD0_OFFSET); 189 | io_rw_32 *qspi_ss_ioctrl = (io_rw_32 *) (IO_QSPI_BASE + IO_QSPI_GPIO_QSPI_SS_CTRL_OFFSET); 190 | uint8_t buf[2]; 191 | buf[0] = 0xff; 192 | buf[1] = 0xff; 193 | 194 | flash_init_spi(); 195 | 196 | uint32_t padctrl_save = qspi_sd_padctrl->sd0; 197 | uint32_t padctrl_tmp = (padctrl_save 198 | & ~(PADS_QSPI_GPIO_QSPI_SD0_OD_BITS | PADS_QSPI_GPIO_QSPI_SD0_PUE_BITS | 199 | PADS_QSPI_GPIO_QSPI_SD0_PDE_BITS) 200 | ) | PADS_QSPI_GPIO_QSPI_SD0_OD_BITS | PADS_QSPI_GPIO_QSPI_SD0_PDE_BITS; 201 | 202 | // First two 32-clock sequences 203 | // CSn is held high for the first 32 clocks, then asserted low for next 32 204 | flash_cs_force(OUTOVER_HIGH); 205 | for (int i = 0; i < 2; ++i) { 206 | // This gives 4 16-bit offset store instructions. Anything else seems to 207 | // produce a large island of constants 208 | qspi_sd_padctrl->sd0 = padctrl_tmp; 209 | qspi_sd_padctrl->sd1 = padctrl_tmp; 210 | qspi_sd_padctrl->sd2 = padctrl_tmp; 211 | qspi_sd_padctrl->sd3 = padctrl_tmp; 212 | 213 | // Brief delay (~6000 cyc) for pulls to take effect 214 | uint32_t delay_cnt = 1u << 11; 215 | asm volatile ( 216 | "1: \n\t" 217 | "sub %0, %0, #1 \n\t" 218 | "bne 1b" 219 | : "+r" (delay_cnt) 220 | ); 221 | 222 | flash_put_get(NULL, NULL, 4, 0); 223 | 224 | padctrl_tmp = (padctrl_tmp 225 | & ~PADS_QSPI_GPIO_QSPI_SD0_PDE_BITS) 226 | | PADS_QSPI_GPIO_QSPI_SD0_PUE_BITS; 227 | 228 | flash_cs_force(OUTOVER_LOW); 229 | } 230 | 231 | // Restore IO/pad controls, and send 0xff, 0xff. Put pullup on IO2/IO3 as 232 | // these may be used as WPn/HOLDn at this point, and we are now starting 233 | // to issue serial commands. 234 | 235 | qspi_sd_padctrl->sd0 = padctrl_save; 236 | qspi_sd_padctrl->sd1 = padctrl_save; 237 | padctrl_save = (padctrl_save 238 | & ~PADS_QSPI_GPIO_QSPI_SD0_PDE_BITS 239 | ) | PADS_QSPI_GPIO_QSPI_SD0_PUE_BITS; 240 | qspi_sd_padctrl->sd2 = padctrl_save; 241 | qspi_sd_padctrl->sd3 = padctrl_save; 242 | 243 | flash_cs_force(OUTOVER_LOW); 244 | flash_put_get(buf, NULL, 2, 0); 245 | 246 | *qspi_ss_ioctrl = 0; 247 | } 248 | 249 | // ---------------------------------------------------------------------------- 250 | // Programming 251 | 252 | // Poll the flash status register until the busy bit (LSB) clears 253 | static inline void flash_wait_ready() { 254 | uint8_t stat; 255 | do { 256 | flash_do_cmd(FLASHCMD_READ_STATUS, NULL, &stat, 1); 257 | } while (stat & 0x1 && !flash_was_aborted()); 258 | } 259 | 260 | // Set the WEL bit (needed before any program/erase operation) 261 | static __noinline void flash_enable_write() { 262 | flash_do_cmd(FLASHCMD_WRITE_ENABLE, NULL, NULL, 0); 263 | } 264 | 265 | // Program a 256 byte page at some 256-byte-aligned flash address, 266 | // from some buffer in memory. Blocks until completion. 267 | void flash_page_program(uint32_t addr, const uint8_t *data) { 268 | assert(addr < 0x1000000); 269 | assert(!(addr & 0xffu)); 270 | flash_enable_write(); 271 | flash_put_cmd_addr(FLASHCMD_PAGE_PROGRAM, addr); 272 | flash_put_get(data, NULL, 256, 4); 273 | flash_wait_ready(); 274 | } 275 | 276 | // Program a range of flash with some data from memory. 277 | // Size is rounded up to nearest 256 bytes. 278 | void __noinline flash_range_program(uint32_t addr, const uint8_t *data, size_t count) { 279 | assert(!(addr & 0xffu)); 280 | uint32_t goal = addr + count; 281 | while (addr < goal && !flash_was_aborted()) { 282 | flash_page_program(addr, data); 283 | addr += 256; 284 | data += 256; 285 | } 286 | } 287 | 288 | // Force MISO input to SSI low so that an in-progress SR polling loop will 289 | // fall through. This is needed when a flash programming task in async task 290 | // context is locked up (e.g. if there is no flash device, and a hard pullup 291 | // on MISO pin -> SR read gives 0xff) and the host issues an abort in IRQ 292 | // context. Bit of a hack 293 | void flash_abort() { 294 | hw_set_bits( 295 | (io_rw_32 *) (IO_QSPI_BASE + IO_QSPI_GPIO_QSPI_SD1_CTRL_OFFSET), 296 | IO_QSPI_GPIO_QSPI_SD1_CTRL_INOVER_VALUE_LOW << IO_QSPI_GPIO_QSPI_SD1_CTRL_INOVER_LSB 297 | ); 298 | } 299 | 300 | // Also allow any unbounded loops to check whether the above abort condition 301 | // was asserted, and terminate early 302 | int flash_was_aborted() { 303 | return *(io_rw_32 *) (IO_QSPI_BASE + IO_QSPI_GPIO_QSPI_SD1_CTRL_OFFSET) 304 | & IO_QSPI_GPIO_QSPI_SD1_CTRL_INOVER_BITS; 305 | } 306 | 307 | // ---------------------------------------------------------------------------- 308 | // Erase 309 | 310 | // Setting correct address alignment is the caller's responsibility 311 | 312 | // Use some other command, supplied by user e.g. a block erase or a chip erase. 313 | // Despite the name, the user is not erased by this function. 314 | void flash_user_erase(uint32_t addr, uint8_t cmd) { 315 | assert(addr < 0x1000000); 316 | flash_enable_write(); 317 | flash_put_cmd_addr(cmd, addr); 318 | flash_put_get(NULL, NULL, 0, 4); 319 | flash_wait_ready(); 320 | } 321 | 322 | // Use a standard 20h 4k erase command: 323 | void flash_sector_erase(uint32_t addr) { 324 | flash_user_erase(addr, FLASHCMD_SECTOR_ERASE); 325 | } 326 | 327 | // block_size must be a power of 2. 328 | // Generally block_size > 4k, and block_cmd is some command which erases a block 329 | // of this size. This accelerates erase speed. 330 | // To use sector-erase only, set block_size to some value larger than flash, 331 | // e.g. 1ul << 31. 332 | // To override the default 20h erase cmd, set block_size == 4k. 333 | void __noinline flash_range_erase(uint32_t addr, size_t count, uint32_t block_size, uint8_t block_cmd) { 334 | uint32_t goal = addr + count; 335 | while (addr < goal && !flash_was_aborted()) { 336 | if (!(addr & (block_size - 1)) && goal - addr >= block_size) { 337 | flash_user_erase(addr, block_cmd); 338 | addr += block_size; 339 | } else { 340 | flash_sector_erase(addr); 341 | addr += 1ul << 12; 342 | } 343 | } 344 | } 345 | 346 | // ---------------------------------------------------------------------------- 347 | // Read 348 | 349 | void __noinline flash_read_data(uint32_t addr, uint8_t *rx, size_t count) { 350 | assert(addr < 0x1000000); 351 | flash_put_cmd_addr(FLASHCMD_READ_DATA, addr); 352 | flash_put_get(NULL, rx, count, 4); 353 | } 354 | 355 | // ---------------------------------------------------------------------------- 356 | // Size determination via SFDP or JEDEC ID (best effort) 357 | // Relevant XKCD is 927 358 | 359 | static inline void flash_read_sfdp(uint32_t addr, uint8_t *rx, size_t count) { 360 | assert(addr < 0x1000000); 361 | flash_put_cmd_addr(FLASHCMD_READ_SFDP, addr); 362 | ssi->dr0 = 0; // dummy byte 363 | flash_put_get(NULL, rx, count, 5); 364 | } 365 | 366 | static inline __attribute__((always_inline)) uint32_t bytes_to_u32le(const uint8_t *b) { 367 | return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); 368 | } 369 | 370 | // Return value >= 0: log 2 of flash size in bytes. 371 | // Return value < 0: unable to determine size. 372 | int __noinline flash_size_log2() { 373 | uint8_t rxbuf[16]; 374 | 375 | // Check magic 376 | flash_read_sfdp(0, rxbuf, 16); 377 | if (bytes_to_u32le(rxbuf) != ('S' | ('F' << 8) | ('D' << 16) | ('P' << 24))) 378 | goto sfdp_fail; 379 | // Skip NPH -- we don't care about nonmandatory parameters. 380 | // Offset 8 is header for mandatory parameter table 381 | // | ID | MinRev | MajRev | Length in words | ptr[2] | ptr[1] | ptr[0] | unused| 382 | // ID must be 0 (JEDEC) for mandatory PTH 383 | if (rxbuf[8] != 0) 384 | goto sfdp_fail; 385 | 386 | uint32_t param_table_ptr = bytes_to_u32le(rxbuf + 12) & 0xffffffu; 387 | flash_read_sfdp(param_table_ptr, rxbuf, 8); 388 | uint32_t array_size_word = bytes_to_u32le(rxbuf + 4); 389 | // MSB set: array >= 2 Gbit, encoded as log2 of number of bits 390 | // MSB clear: array < 2 Gbit, encoded as direct bit count 391 | if (array_size_word & (1u << 31)) { 392 | array_size_word &= ~(1u << 31); 393 | } else { 394 | uint32_t ctr = 0; 395 | array_size_word += 1; 396 | while (array_size_word >>= 1) 397 | ++ctr; 398 | array_size_word = ctr; 399 | } 400 | // Sanity check... 2kbit is minimum for 2nd stage, 128 Gbit is 1000x bigger than we can XIP 401 | if (array_size_word < 11 || array_size_word > 37) 402 | goto sfdp_fail; 403 | return array_size_word - 3; 404 | 405 | sfdp_fail: 406 | // If no SFDP, it's common to encode log2 of main array size in second 407 | // byte of JEDEC ID 408 | flash_do_cmd(FLASHCMD_READ_JEDEC_ID, NULL, rxbuf, 3); 409 | uint8_t array_size_byte = rxbuf[2]; 410 | // Confusingly this is log2 of size in bytes, not bits like SFDP. Sanity check: 411 | if (array_size_byte < 8 || array_size_byte > 34) 412 | goto jedec_id_fail; 413 | return array_size_byte; 414 | 415 | jedec_id_fail: 416 | return -1; 417 | } 418 | 419 | // ---------------------------------------------------------------------------- 420 | // XIP Entry 421 | 422 | // This is a hook for steps to be taken in between programming the flash and 423 | // doing cached XIP reads from the flash. Called by the bootrom before 424 | // entering flash second stage, and called by the debugger after flash 425 | // programming. 426 | void __noinline flash_flush_cache() { 427 | xip_ctrl_hw->flush = 1; 428 | // Read blocks until flush completion 429 | (void) xip_ctrl_hw->flush; 430 | // Enable the cache 431 | hw_set_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_EN_BITS); 432 | flash_cs_force(OUTOVER_NORMAL); 433 | } 434 | 435 | // Put the SSI into a mode where XIP accesses translate to standard 436 | // serial 03h read commands. The flash remains in its default serial command 437 | // state, so will still respond to other commands. 438 | void __noinline flash_enter_cmd_xip() { 439 | ssi->ssienr = 0; 440 | ssi->ctrlr0 = 441 | (SSI_CTRLR0_SPI_FRF_VALUE_STD << SSI_CTRLR0_SPI_FRF_LSB) | // Standard 1-bit SPI serial frames 442 | (31 << SSI_CTRLR0_DFS_32_LSB) | // 32 clocks per data frame 443 | (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ << SSI_CTRLR0_TMOD_LSB); // Send instr + addr, receive data 444 | ssi->spi_ctrlr0 = 445 | (FLASHCMD_READ_DATA << SSI_SPI_CTRLR0_XIP_CMD_LSB) | // Standard 03h read 446 | (2u << SSI_SPI_CTRLR0_INST_L_LSB) | // 8-bit instruction prefix 447 | (6u << SSI_SPI_CTRLR0_ADDR_L_LSB) | // 24-bit addressing for 03h commands 448 | (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C1A // Command and address both in serial format 449 | << SSI_SPI_CTRLR0_TRANS_TYPE_LSB); 450 | ssi->ssienr = 1; 451 | } 452 | -------------------------------------------------------------------------------- /bootrom/program_flash_generic.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _PROGRAM_FLASH_GENERIC_H 8 | #define _PROGRAM_FLASH_GENERIC_H 9 | 10 | #include 11 | #include 12 | 13 | void connect_internal_flash(); 14 | void flash_init_spi(); 15 | void flash_put_get(const uint8_t *tx, uint8_t *rx, size_t count, size_t rx_skip); 16 | void flash_do_cmd(uint8_t cmd, const uint8_t *tx, uint8_t *rx, size_t count); 17 | void flash_exit_xip(); 18 | void flash_page_program(uint32_t addr, const uint8_t *data); 19 | void flash_range_program(uint32_t addr, const uint8_t *data, size_t count); 20 | void flash_sector_erase(uint32_t addr); 21 | void flash_user_erase(uint32_t addr, uint8_t cmd); 22 | void flash_range_erase(uint32_t addr, size_t count, uint32_t block_size, uint8_t block_cmd); 23 | void flash_read_data(uint32_t addr, uint8_t *rx, size_t count); 24 | int flash_size_log2(); 25 | void flash_flush_cache(); 26 | void flash_enter_cmd_xip(); 27 | void flash_abort(); 28 | int flash_was_aborted(); 29 | 30 | #endif // _PROGRAM_FLASH_GENERIC_H_ 31 | -------------------------------------------------------------------------------- /bootrom/resets.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _RESETS_H 8 | #define _RESETS_H 9 | 10 | #include "pico/types.h" 11 | 12 | void reset_block_noinline(uint32_t mask); 13 | void unreset_block_wait_noinline(uint32_t mask); 14 | void reset_unreset_block_wait_noinline(uint32_t mask); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /bootrom/sftable.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _SFTABLE_H 8 | #define _SFTABLE_H 9 | 10 | #define SF_TABLE_FADD 0x00 11 | #define SF_TABLE_FSUB 0x04 12 | #define SF_TABLE_FMUL 0x08 13 | #define SF_TABLE_FDIV 0x0c 14 | #define SF_TABLE_FCMP 0x10 15 | #define SF_TABLE_FCMP_FLAGS 0x14 16 | #define SF_TABLE_FSQRT 0x18 17 | #define SF_TABLE_FLOAT2INT 0x1c 18 | #define SF_TABLE_FLOAT2FIX 0x20 19 | #define SF_TABLE_FLOAT2UINT 0x24 20 | #define SF_TABLE_FLOAT2UFIX 0x28 21 | #define SF_TABLE_INT2FLOAT 0x2c 22 | #define SF_TABLE_FIX2FLOAT 0x30 23 | #define SF_TABLE_UINT2FLOAT 0x34 24 | #define SF_TABLE_UFIX2FLOAT 0x38 25 | #define SF_TABLE_FCOS 0x3c 26 | #define SF_TABLE_FSIN 0x40 27 | #define SF_TABLE_FTAN 0x44 28 | #define SF_TABLE_FATAN2 0x48 29 | #define SF_TABLE_FEXP 0x4c 30 | #define SF_TABLE_FLN 0x50 31 | 32 | #define SF_TABLE_SIZE 0x54 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /bootrom/usb_boot_device.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _USB_BOOT_DEVICE_H 8 | #define _USB_BOOT_DEVICE_H 9 | 10 | #include "usb_device.h" 11 | 12 | #define VENDOR_ID 0x2e8au 13 | #define PRODUCT_ID 0x0003u 14 | 15 | void usb_boot_device_init(uint32_t _usb_disable_interface_mask); 16 | 17 | void safe_reboot(uint32_t addr, uint32_t sp, uint32_t delay_ms); 18 | 19 | // note these are inclusive to save - 1 checks... we always test the start and end of a range, so the range would have to be zero length which we don't use 20 | static inline bool is_address_ram(uint32_t addr) { 21 | // todo allow access to parts of USB ram? 22 | return (addr >= SRAM_BASE && addr <= SRAM_END) || 23 | (addr >= XIP_SRAM_BASE && addr <= XIP_SRAM_END); 24 | } 25 | 26 | static inline bool is_address_flash(uint32_t addr) { 27 | // todo maybe smaller? 28 | return (addr >= XIP_MAIN_BASE && addr <= SRAM_BASE); 29 | } 30 | 31 | static inline bool is_address_rom(uint32_t addr) { 32 | return addr < 8192; 33 | } 34 | 35 | // zero terminated 36 | extern char serial_number_string[13]; 37 | 38 | #endif //_USB_BOOT_DEVICE_H 39 | -------------------------------------------------------------------------------- /bootrom/virtual_disk.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include "runtime.h" 8 | #include "usb_boot_device.h" 9 | #include "virtual_disk.h" 10 | #include "boot/uf2.h" 11 | #include "scsi.h" 12 | #include "usb_msc.h" 13 | #include "async_task.h" 14 | #include "generated.h" 15 | 16 | // Fri, 05 Sep 2008 16:20:51 17 | #define RASPBERRY_PI_TIME_FRAC 100 18 | #define RASPBERRY_PI_TIME ((16u << 11u) | (20u << 5u) | (51u >> 1u)) 19 | #define RASPBERRY_PI_DATE ((28u << 9u) | (9u << 5u) | (5u)) 20 | //#define NO_PARTITION_TABLE 21 | 22 | #define CLUSTER_SIZE (4096u * CLUSTER_UP_MUL) 23 | #define CLUSTER_SHIFT (3u + CLUSTER_UP_SHIFT) 24 | static_assert(CLUSTER_SIZE == SECTOR_SIZE << CLUSTER_SHIFT, ""); 25 | 26 | #define CLUSTER_COUNT (VOLUME_SIZE / CLUSTER_SIZE) 27 | 28 | static_assert(CLUSTER_COUNT <= 65526, "FAT16 limit"); 29 | 30 | #ifdef NO_PARTITION_TABLE 31 | #define VOLUME_SECTOR_COUNT SECTOR_COUNT 32 | #else 33 | #define VOLUME_SECTOR_COUNT (SECTOR_COUNT-1) 34 | #endif 35 | 36 | #define FAT_COUNT 2u 37 | #define MAX_ROOT_DIRECTORY_ENTRIES 512 38 | #define ROOT_DIRECTORY_SECTORS (MAX_ROOT_DIRECTORY_ENTRIES * 32u / SECTOR_SIZE) 39 | 40 | #define lsb_hword(x) (((uint)(x)) & 0xffu), ((((uint)(x))>>8u)&0xffu) 41 | #define lsb_word(x) (((uint)(x)) & 0xffu), ((((uint)(x))>>8u)&0xffu), ((((uint)(x))>>16u)&0xffu), ((((uint)(x))>>24u)&0xffu) 42 | 43 | #define SECTORS_PER_FAT (2 * (CLUSTER_COUNT + SECTOR_SIZE - 1) / SECTOR_SIZE) 44 | static_assert(SECTORS_PER_FAT < 65536, ""); 45 | 46 | static_assert(VOLUME_SIZE >= 16 * 1024 * 1024, "volume too small for fat16"); 47 | 48 | // we are a hard drive - SCSI inquiry defines removability 49 | #define IS_REMOVABLE_MEDIA false 50 | #define MEDIA_TYPE (IS_REMOVABLE_MEDIA ? 0xf0u : 0xf8u) 51 | 52 | #define MAX_RAM_UF2_BLOCKS 1280 53 | static_assert(MAX_RAM_UF2_BLOCKS >= ((SRAM_END - SRAM_BASE) + (XIP_SRAM_END - XIP_SRAM_BASE)) / 256, ""); 54 | 55 | static __attribute__((aligned(4))) uint32_t uf2_valid_ram_blocks[(MAX_RAM_UF2_BLOCKS + 31) / 32]; 56 | 57 | enum partition_type { 58 | PT_FAT12 = 1, 59 | PT_FAT16 = 4, 60 | PT_FAT16_LBA = 0xe, 61 | }; 62 | 63 | static const uint8_t boot_sector[] = { 64 | // 00 here should mean not bootable (according to spec) -- still windows unhappy without it 65 | 0xeb, 0x3c, 0x90, 66 | // 03 id 67 | 'M', 'S', 'W', 'I', 'N', '4', '.', '1', 68 | // 'U', 'F', '2', ' ', 'U', 'F', '2', ' ', 69 | // 0b bytes per sector 70 | lsb_hword(512), 71 | // 0d sectors per cluster 72 | (CLUSTER_SIZE / SECTOR_SIZE), 73 | // 0e reserved sectors 74 | lsb_hword(1), 75 | // 10 fat count 76 | FAT_COUNT, 77 | // 11 max number root entries 78 | lsb_hword(MAX_ROOT_DIRECTORY_ENTRIES), 79 | // 13 number of sectors, if < 32768 80 | #if VOLUME_SECTOR_COUNT < 65536 81 | lsb_hword(VOLUME_SECTOR_COUNT), 82 | #else 83 | lsb_hword(0), 84 | #endif 85 | // 15 media descriptor 86 | MEDIA_TYPE, 87 | // 16 sectors per FAT 88 | lsb_hword(SECTORS_PER_FAT), 89 | // 18 sectors per track (non LBA) 90 | lsb_hword(1), 91 | // 1a heads (non LBA) 92 | lsb_hword(1), 93 | // 1c hidden sectors 1 for MBR 94 | lsb_word(SECTOR_COUNT - VOLUME_SECTOR_COUNT), 95 | // 20 sectors if >32K 96 | #if VOLUME_SECTOR_COUNT >= 65536 97 | lsb_word(VOLUME_SECTOR_COUNT), 98 | #else 99 | lsb_word(0), 100 | #endif 101 | // 24 drive number 102 | 0, 103 | // 25 reserved (seems to be chkdsk flag for clean unmount - linux writes 1) 104 | 0, 105 | // 26 extended boot sig 106 | 0x29, 107 | // 27 serial number 108 | 0, 0, 0, 0, 109 | // 2b label 110 | 'R', 'P', 'I', '-', 'R', 'P', '2', ' ', ' ', ' ', ' ', 111 | 'F', 'A', 'T', '1', '6', ' ', ' ', ' ', 112 | 0xeb, 0xfe // while(1); 113 | }; 114 | static_assert(sizeof(boot_sector) == 0x40, ""); 115 | 116 | #define BOOT_OFFSET_SERIAL_NUMBER 0x27 117 | #define BOOT_OFFSET_LABEL 0x2b 118 | 119 | #define ATTR_READONLY 0x01u 120 | #define ATTR_HIDDEN 0x02u 121 | #define ATTR_SYSTEM 0x04u 122 | #define ATTR_VOLUME_LABEL 0x08u 123 | #define ATTR_DIR 0x10u 124 | #define ATTR_ARCHIVE 0x20u 125 | 126 | #define MBR_OFFSET_SERIAL_NUMBER 0x1b8 127 | 128 | struct dir_entry { 129 | uint8_t name[11]; 130 | uint8_t attr; 131 | uint8_t reserved; 132 | uint8_t creation_time_frac; 133 | uint16_t creation_time; 134 | uint16_t creation_date; 135 | uint16_t last_access_date; 136 | uint16_t cluster_hi; 137 | uint16_t last_modified_time; 138 | uint16_t last_modified_date; 139 | uint16_t cluster_lo; 140 | uint32_t size; 141 | }; 142 | 143 | static_assert(sizeof(struct dir_entry) == 32, ""); 144 | 145 | static struct uf2_info { 146 | uint32_t *valid_blocks; 147 | uint32_t max_valid_blocks; 148 | uint32_t *cleared_pages; 149 | uint32_t max_cleared_pages; 150 | uint32_t num_blocks; 151 | uint32_t token; 152 | uint32_t valid_block_count; 153 | uint32_t lowest_addr; 154 | uint32_t block_no; 155 | struct async_task next_task; 156 | bool ram; 157 | } _uf2_info; 158 | 159 | // --- start non IRQ code --- 160 | 161 | static void _write_uf2_page_complete(struct async_task *task) { 162 | if (task->token == _uf2_info.token) { 163 | if (!task->result && _uf2_info.valid_block_count == _uf2_info.num_blocks) { 164 | safe_reboot(_uf2_info.ram ? _uf2_info.lowest_addr : 0, SRAM_END, 1000); //300); // reboot in 300 ms 165 | } 166 | } 167 | vd_async_complete(task->token, task->result); 168 | } 169 | 170 | // return true for async 171 | static bool _write_uf2_page() { 172 | // If we need to write a page (i.e. it hasn't been written before, then we queue a task to do that asynchronously 173 | // 174 | // Note that in an ideal world, given that we aren't synchronizing with the task in any way from here on, 175 | // we'd hand that task an immutable work item so that we don't step on the task's toes later. 176 | // 177 | // In the constrained bootrom (no RAM use) environment we don't have space to do that, so instead we pass 178 | // it a work item which is immutable except for the data buffer to be written. 179 | // 180 | // Note that we also pre-update all _uf2_info state in anticipation of the write being completed. This saves us 181 | // doing some extra figuring in _write_uf2_page_complete later, and there are only two cases we care about 182 | // 183 | // 1) that the task fails, in which case we'll notice in _write_uf2_page_complete anyway, and we can reset. 184 | // 2) that we superseded what the task was doing with a new UF2 download, in which case the old state is irrelevant. 185 | // 186 | // So basically the rule is, that this method (and _write_uf2_page_complete) which are both called under our 187 | // pseudo-lock (i.e. during IRQ or with IRQs disabled) are the onlu things that touch UF2 tracking state... 188 | // the task just takes an immutable command (with possibly mutable data), and takes care of writing that data to FLASH or RAM 189 | // along with erase etc. 190 | usb_debug("_write_uf2_page tok %d block %d / %d\n", (int) _uf2_info.token, _uf2_info.block_no, 191 | (int) _uf2_info.info.num_blocks); 192 | uint block_offset = _uf2_info.block_no / 32; 193 | uint32_t block_mask = 1u << (_uf2_info.block_no & 31u); 194 | if (!(_uf2_info.valid_blocks[block_offset] & block_mask)) { 195 | // note we don't want to pick XIP_CACHE over RAM even though it has a lower address 196 | bool xip_cache_next = _uf2_info.next_task.transfer_addr < SRAM_BASE; 197 | bool xip_cache_lowest = _uf2_info.lowest_addr < SRAM_BASE; 198 | if ((_uf2_info.next_task.transfer_addr < _uf2_info.lowest_addr && xip_cache_next == xip_cache_lowest) || 199 | (xip_cache_lowest && !xip_cache_next)) { 200 | _uf2_info.lowest_addr = _uf2_info.next_task.transfer_addr; 201 | } 202 | if (_uf2_info.ram) { 203 | assert(_uf2_info.next_task.transfer_addr); 204 | } else { 205 | uint page_no = _uf2_info.block_no * 256 / FLASH_SECTOR_ERASE_SIZE; 206 | assert(_uf2_info.cleared_pages); 207 | assert(page_no < _uf2_info.max_cleared_pages); 208 | uint page_offset = page_no / 32; 209 | uint32_t page_mask = 1u << (page_no & 31u); 210 | assert(page_offset <= _uf2_info.max_cleared_pages); 211 | if (!(_uf2_info.cleared_pages[page_offset] & page_mask)) { 212 | _uf2_info.next_task.erase_addr = _uf2_info.next_task.transfer_addr & ~(FLASH_SECTOR_ERASE_SIZE - 1u); 213 | _uf2_info.next_task.erase_size = FLASH_SECTOR_ERASE_SIZE; // always erase a single sector 214 | usb_debug("Setting erase addr %08x\n", (uint) _uf2_info.next_task.erase_addr); 215 | _uf2_info.cleared_pages[page_offset] |= page_mask; 216 | _uf2_info.next_task.type |= AT_FLASH_ERASE; 217 | } 218 | usb_debug("Have flash destined page %08x (%08x %08x)\n", (uint) _uf2_info.next_task.transfer_addr, 219 | (uint) *(uint32_t *) _uf2_info.next_task.data, 220 | (uint) *(uint32_t *) (_uf2_info.next_task.data + 4)); 221 | assert(!(_uf2_info.next_task.transfer_addr & 0xffu)); 222 | } 223 | _uf2_info.valid_block_count++; 224 | _uf2_info.valid_blocks[block_offset] |= block_mask; 225 | usb_warn("Queuing 0x%08x->0x%08x valid %d/%d checked %d/%d\n", (uint) 226 | (uint) _uf2_info.next_task.transfer_addr, (uint) (_uf2_info.next_task.transfer_addr + FLASH_PAGE_SIZE), 227 | (uint) _uf2_info.block_no + 1u, (uint) _uf2_info.num_blocks, (uint) _uf2_info.valid_block_count, 228 | (uint) _uf2_info.num_blocks); 229 | queue_task(&virtual_disk_queue, &_uf2_info.next_task, _write_uf2_page_complete); 230 | // after the first write (i.e. next time, we want to check the source) 231 | _uf2_info.next_task.check_last_mutation_source = true; 232 | // note that queue_task may actually be handled sychronously based on #define, however that is OK 233 | // because it still calls _write_uf2_page_complete which still calls vd_async_complete which is allowed even in non async. 234 | return true; 235 | } else { 236 | assert(_uf2_info.next_task.type); // we should not have had any valid blocks after reset... we must take the above path so that the task gets executed 237 | uf2_debug("Ignore duplicate write to 0x%08x->0x%08x\n", 238 | (uint) _uf2_info.next_task.transfer_addr, 239 | (uint) (_uf2_info.next_task.transfer_addr + FLASH_PAGE_SIZE)); 240 | } 241 | return false; // not async 242 | } 243 | 244 | void vd_init() { 245 | } 246 | 247 | void vd_reset() { 248 | usb_debug("Resetting virtual disk\n"); 249 | _uf2_info.num_blocks = 0; // marker that uf2_info is invalid 250 | } 251 | 252 | // note caller must pass SECTOR_SIZE buffer 253 | void init_dir_entry(struct dir_entry *entry, const char *fn, uint cluster, uint len) { 254 | entry->creation_time_frac = RASPBERRY_PI_TIME_FRAC; 255 | entry->creation_time = RASPBERRY_PI_TIME; 256 | entry->creation_date = RASPBERRY_PI_DATE; 257 | entry->last_modified_time = RASPBERRY_PI_TIME; 258 | entry->last_modified_date = RASPBERRY_PI_DATE; 259 | memcpy(entry->name, fn, 11); 260 | entry->attr = ATTR_READONLY | ATTR_ARCHIVE; 261 | entry->cluster_lo = cluster; 262 | entry->size = len; 263 | } 264 | 265 | bool vd_read_block(__unused uint32_t token, uint32_t lba, uint8_t *buf __comma_removed_for_space(uint32_t buf_size)) { 266 | assert(buf_size >= SECTOR_SIZE); 267 | memset0(buf, SECTOR_SIZE); 268 | #ifndef NO_PARTITION_TABLE 269 | if (!lba) { 270 | uint8_t *ptable = buf + SECTOR_SIZE - 2 - 64; 271 | 272 | #if 0 273 | // simple LBA partition at sector 1 274 | ptable[4] = PT_FAT16_LBA; 275 | // 08 LSB start sector 276 | ptable[8] = 1; 277 | // 12 LSB sector count 278 | ptable[12] = (SECTOR_COUNT-1) & 0xffu; 279 | ptable[13] = ((SECTOR_COUNT-1)>>8u) & 0xffu; 280 | ptable[14] = ((SECTOR_COUNT-1)>>16u) & 0xffu; 281 | static_assert(!(SECTOR_COUNT>>24u), ""); 282 | #else 283 | static_assert(!((SECTOR_COUNT - 1u) >> 24), ""); 284 | static const uint8_t _ptable_data4[] = { 285 | PT_FAT16_LBA, 0, 0, 0, 286 | lsb_word(1), // sector 1 287 | // sector count, but we know the MS byte is zero 288 | (SECTOR_COUNT - 1u) & 0xffu, 289 | ((SECTOR_COUNT - 1u) >> 8u) & 0xffu, 290 | ((SECTOR_COUNT - 1u) >> 16u) & 0xffu, 291 | }; 292 | memcpy(ptable + 4, _ptable_data4, sizeof(_ptable_data4)); 293 | #endif 294 | ptable[64] = 0x55; 295 | ptable[65] = 0xaa; 296 | 297 | uint32_t sn = msc_get_serial_number32(); 298 | memcpy(buf + MBR_OFFSET_SERIAL_NUMBER, &sn, 4); 299 | return false; 300 | } 301 | lba--; 302 | #endif 303 | if (!lba) { 304 | uint32_t sn = msc_get_serial_number32(); 305 | memcpy(buf, boot_sector, sizeof(boot_sector)); 306 | memcpy(buf + BOOT_OFFSET_SERIAL_NUMBER, &sn, 4); 307 | } else { 308 | lba--; 309 | if (lba < SECTORS_PER_FAT * FAT_COUNT) { 310 | // mirror 311 | while (lba >= SECTORS_PER_FAT) lba -= SECTORS_PER_FAT; 312 | if (!lba) { 313 | uint16_t *p = (uint16_t *) buf; 314 | p[0] = 0xff00u | MEDIA_TYPE; 315 | p[1] = 0xffff; 316 | p[2] = 0xffff; // cluster2 is index.htm 317 | #ifdef USE_INFO_UF2 318 | p[3] = 0xffff; // cluster3 is info_uf2.txt 319 | #endif 320 | } 321 | } else { 322 | lba -= SECTORS_PER_FAT * FAT_COUNT; 323 | if (lba < ROOT_DIRECTORY_SECTORS) { 324 | // we don't support that many directory entries actually 325 | if (!lba) { 326 | // root directory 327 | struct dir_entry *entries = (struct dir_entry *) buf; 328 | memcpy(entries[0].name, (boot_sector + BOOT_OFFSET_LABEL), 11); 329 | entries[0].attr = ATTR_VOLUME_LABEL | ATTR_ARCHIVE; 330 | init_dir_entry(++entries, "INDEX HTM", 2, welcome_html_len); 331 | #ifdef USE_INFO_UF2 332 | init_dir_entry(++entries, "INFO_UF2TXT", 3, info_uf2_txt_len); 333 | #endif 334 | } 335 | } else { 336 | lba -= ROOT_DIRECTORY_SECTORS; 337 | uint cluster = lba >> CLUSTER_SHIFT; 338 | uint cluster_offset = lba - (cluster << CLUSTER_SHIFT); 339 | if (!cluster_offset) { 340 | if (cluster == 0) { 341 | #ifndef COMPRESS_TEXT 342 | memcpy(buf, welcome_html, welcome_html_len); 343 | #else 344 | poor_mans_text_decompress(welcome_html_z + sizeof(welcome_html_z), sizeof(welcome_html_z), buf); 345 | memcpy(buf + welcome_html_version_offset_1, serial_number_string, 12); 346 | memcpy(buf + welcome_html_version_offset_2, serial_number_string, 12); 347 | #endif 348 | } 349 | #ifdef USE_INFO_UF2 350 | else if (cluster == 1) { 351 | // spec suggests we have this as raw text in the binary, although it doesn't much matter if no CURRENT.UF2 file 352 | // note that this text doesn't compress anyway, so do this raw anyway 353 | memcpy(buf, info_uf2_txt, info_uf2_txt_len); 354 | } 355 | #endif 356 | } 357 | } 358 | } 359 | } 360 | return false; 361 | } 362 | 363 | #define FLASH_MAX_VALID_BLOCKS ((FLASH_BITMAPS_SIZE * 8LL * FLASH_SECTOR_ERASE_SIZE / (FLASH_PAGE_SIZE + FLASH_SECTOR_ERASE_SIZE)) & ~31u) 364 | #define FLASH_CLEARED_PAGES_BASE (FLASH_VALID_BLOCKS_BASE + FLASH_MAX_VALID_BLOCKS / 8) 365 | static_assert(!(FLASH_CLEARED_PAGES_BASE & 0x3), ""); 366 | #define FLASH_MAX_CLEARED_PAGES (FLASH_MAX_VALID_BLOCKS * FLASH_PAGE_SIZE / FLASH_SECTOR_ERASE_SIZE) 367 | static_assert(FLASH_CLEARED_PAGES_BASE + (FLASH_MAX_CLEARED_PAGES / 32 - FLASH_VALID_BLOCKS_BASE <= FLASH_BITMAPS_SIZE), 368 | ""); 369 | 370 | static void _clear_bitset(uint32_t *mask, uint32_t count) { 371 | memset0(mask, count / 8); 372 | } 373 | 374 | static bool _update_current_uf2_info(struct uf2_block *uf2, uint32_t token) { 375 | bool ram = is_address_ram(uf2->target_addr) && is_address_ram(uf2->target_addr + (FLASH_PAGE_MASK)); 376 | bool flash = is_address_flash(uf2->target_addr) && is_address_flash(uf2->target_addr + (FLASH_PAGE_MASK)); 377 | if (!(uf2->num_blocks && (ram || flash)) || (flash && (uf2->target_addr & (FLASH_PAGE_MASK)))) { 378 | uf2_debug("Resetting active UF2 transfer because received garbage\n"); 379 | } else if (!virtual_disk_queue.disable) { 380 | // note (test abive) if virtual disk queue is disabled (and note since we're in IRQ that cannot change whilst we are executing), 381 | // then we don't want to do any of this even if the task will be ignored later (doing this would modify our state) 382 | uint8_t type = AT_WRITE; // we always write 383 | if (_uf2_info.num_blocks != uf2->num_blocks) { 384 | // todo we may be able to skip some of these checks and let the task handle it (it will ignore garbage addresses for example) 385 | uf2_debug("Resetting active UF2 transfer because have new binary size %d->%d\n", (int) _uf2_info.num_blocks, 386 | (int) uf2->num_blocks); 387 | memset0(&_uf2_info, sizeof(_uf2_info)); 388 | _uf2_info.ram = ram; 389 | _uf2_info.valid_blocks = ram ? uf2_valid_ram_blocks : (uint32_t *) FLASH_VALID_BLOCKS_BASE; 390 | _uf2_info.max_valid_blocks = ram ? count_of(uf2_valid_ram_blocks) * 32 : FLASH_MAX_VALID_BLOCKS; 391 | uf2_debug(" ram %d, so valid_blocks (max %d) %p->%p for %dK\n", ram, (int) _uf2_info.max_valid_blocks, 392 | _uf2_info.valid_blocks, _uf2_info.valid_blocks + ((_uf2_info.max_valid_blocks + 31) / 32), 393 | (uint) _uf2_info.max_valid_blocks / 4); 394 | _clear_bitset(_uf2_info.valid_blocks, _uf2_info.max_valid_blocks); 395 | if (flash) { 396 | _uf2_info.cleared_pages = (uint32_t *) FLASH_CLEARED_PAGES_BASE; 397 | _uf2_info.max_cleared_pages = FLASH_MAX_CLEARED_PAGES; 398 | uf2_debug(" cleared_pages %p->%p\n", _uf2_info.cleared_pages, 399 | _uf2_info.cleared_pages + ((_uf2_info.max_cleared_pages + 31) / 32)); 400 | _clear_bitset(_uf2_info.cleared_pages, _uf2_info.max_cleared_pages); 401 | } 402 | 403 | if (uf2->num_blocks > _uf2_info.max_valid_blocks) { 404 | uf2_debug("Oops image requires %d blocks and won't fit", (uint) uf2->num_blocks); 405 | return false; 406 | } 407 | usb_warn("New UF2 transfer\n"); 408 | _uf2_info.num_blocks = uf2->num_blocks; 409 | _uf2_info.valid_block_count = 0; 410 | _uf2_info.lowest_addr = 0xffffffff; 411 | if (flash) type |= AT_EXIT_XIP; 412 | } 413 | if (ram != _uf2_info.ram) { 414 | uf2_debug("Ignoring write to out of range address 0x%08x->0x%08x\n", 415 | (uint) uf2->target_addr, (uint) (uf2->target_addr + uf2->payload_size)); 416 | } else { 417 | assert(uf2->num_blocks <= _uf2_info.max_valid_blocks); 418 | if (uf2->block_no < uf2->num_blocks) { 419 | // set up next task state (also serves as a holder for state scoped to this block write to avoid copying data around) 420 | reset_task(&_uf2_info.next_task); 421 | _uf2_info.block_no = uf2->block_no; 422 | _uf2_info.token = _uf2_info.next_task.token = token; 423 | _uf2_info.next_task.transfer_addr = uf2->target_addr; 424 | _uf2_info.next_task.type = type; 425 | _uf2_info.next_task.data = uf2->data; 426 | _uf2_info.next_task.callback = _write_uf2_page_complete; 427 | _uf2_info.next_task.data_length = FLASH_PAGE_SIZE; // always a full page 428 | _uf2_info.next_task.source = TASK_SOURCE_VIRTUAL_DISK; 429 | return true; 430 | } else { 431 | uf2_debug("Ignoring write to out of range block %d >= %d\n", (int) uf2->block_no, 432 | (int) uf2->num_blocks); 433 | } 434 | } 435 | } 436 | _uf2_info.num_blocks = 0; // invalid 437 | return false; 438 | } 439 | 440 | // note caller must pass SECTOR_SIZE buffer 441 | bool vd_write_block(uint32_t token, __unused uint32_t lba, uint8_t *buf __comma_removed_for_space(uint32_t buf_size)) { 442 | struct uf2_block *uf2 = (struct uf2_block *) buf; 443 | if (uf2->magic_start0 == UF2_MAGIC_START0 && uf2->magic_start1 == UF2_MAGIC_START1 && 444 | uf2->magic_end == UF2_MAGIC_END) { 445 | if (uf2->flags & UF2_FLAG_FAMILY_ID_PRESENT && uf2->file_size == RP2040_FAMILY_ID && 446 | !(uf2->flags & UF2_FLAG_NOT_MAIN_FLASH) && uf2->payload_size == 256) { 447 | if (_update_current_uf2_info(uf2, token)) { 448 | // if we have a valid uf2 page, write it 449 | return _write_uf2_page(); 450 | } 451 | } else { 452 | uf2_debug("Sector %d: ignoring write of non Mu UF2 sector\n", (uint) lba); 453 | } 454 | } else { 455 | uf2_debug("Sector %d: ignoring write of non UF2 sector\n", (uint) lba); 456 | } 457 | return false; 458 | } -------------------------------------------------------------------------------- /bootrom/welcome.html: -------------------------------------------------------------------------------- 1 | Redirecting to raspberrypi.com -------------------------------------------------------------------------------- /generator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(generate C) 2 | 3 | add_executable(generate main.c) 4 | -------------------------------------------------------------------------------- /generator/main.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "../usb_device_tiny/scsi_ir.h" 15 | 16 | #ifndef MIN 17 | #define MIN(a,b) ((a)<(b)?(a):(b)) 18 | #endif 19 | 20 | #ifndef MAX 21 | #define MAX(a,b) ((a)>(b)?(a):(b)) 22 | #endif 23 | 24 | typedef unsigned int uint; 25 | 26 | static int decompress(const uint8_t *data, int data_size, uint8_t *buf, int buf_size); 27 | static void dump(const char *name, char *suffix, const uint8_t *buf, size_t n); 28 | 29 | // NOTE: This is a VERY simple and HIGHLY specialized compression format. 30 | // It can compress a string of bytes (which contain only 0 <= c <= 127)... 31 | // 32 | // The goals were simplicity/size of decode function and the ability to 33 | // handle repeated strings in the welcome.html 34 | int compress(const uint8_t *src, int len, const char *name) { 35 | static uint8_t buf[4096]; 36 | int cost = len; 37 | int n = 0; 38 | for (int i = 0; i < len; i++) { 39 | if (src[i] >= 0x80) { 40 | fprintf(stderr, "Input data may not have top bit set"); 41 | return 1; 42 | } 43 | } 44 | for (int to = 0; to < len; to++) { 45 | bool found = false; 46 | for (int match_len = MIN(127, len - to); match_len >= 3 && !found; match_len--) { 47 | for (int from = MAX(0, to - 255); from < to && !found; from++) { 48 | if (!memcmp(src + to, src + from, match_len)) { 49 | assert(from < to); 50 | // printf("Match %d from %d+%d\n", i, k, match_len); 51 | // todo look for other matches? 52 | cost += 2; 53 | cost -= match_len; 54 | buf[n++] = 0x100 - match_len; 55 | buf[n++] = to - from; 56 | to += match_len; 57 | found = true; 58 | } 59 | } 60 | } 61 | if (to != len) 62 | buf[n++] = src[to]; 63 | } 64 | static uint8_t check_buf[4096]; 65 | int check_n = decompress(buf, n, check_buf, sizeof(check_buf)); 66 | if (check_n != len || memcmp(src, check_buf, len)) { 67 | fprintf(stderr, "Decompress check failed\n"); 68 | dump("expected", "", src, len); 69 | dump("actual", "", check_buf, check_n); 70 | return 1; 71 | } 72 | 73 | printf("#ifdef COMPRESS_TEXT\n"); 74 | printf("// %s:\n", name); 75 | printf("// %d/%d %d %d\n", cost, len, len - cost, n); 76 | dump(name, "_z", buf, n); 77 | printf("#define %s_len %d\n", name, len); 78 | printf("#endif\n\n"); 79 | return 0; 80 | } 81 | 82 | int decompress(const uint8_t *data, int data_size, uint8_t *buf, int buf_size) { 83 | int n = 0; 84 | for (int i = 0; i < data_size && n < buf_size; i++) { 85 | uint8_t b = data[i]; 86 | if (b < 0x80u) { 87 | buf[n++] = b; 88 | } else { 89 | int len = 0x100 - b; 90 | int off = n - data[++i]; 91 | while (len--) { 92 | buf[n++] = buf[off++]; 93 | } 94 | } 95 | } 96 | if (n == buf_size) n = 0; 97 | return n; 98 | } 99 | 100 | void dump(const char *name, char *suffix, const uint8_t *buf, size_t n) { 101 | printf("static const uint8_t %s%s[] = {\n", name, suffix); 102 | for (size_t i = 0; i < n; i += 12) { 103 | printf(" "); 104 | for (size_t j = i; j < MIN(n, i + 12); j++) { 105 | printf("0x%02x, ", buf[j]); 106 | } 107 | printf("\n"); 108 | } 109 | printf("};\n"); 110 | printf("#define %s%s_len %ld\n", name, suffix, n); 111 | } 112 | 113 | static char filename[FILENAME_MAX]; 114 | 115 | #if defined(WIN32) || defined(_WIN32) 116 | #define PATH_SEPARATOR "\\" 117 | #else 118 | #define PATH_SEPARATOR "/" 119 | #endif 120 | 121 | void replace(char *str, char from, char to) { 122 | for (char *p = strchr(str, from); p; p = strchr(p, from)) *p = to; 123 | } 124 | 125 | int read_fully_and_write_header(char *path, char *fn, uint8_t *buf, uint buf_size, size_t *size_out, bool ifdef) { 126 | int rc = 0; 127 | snprintf(filename, sizeof(filename), "%s%s%s", path, PATH_SEPARATOR, fn); 128 | FILE *in = fopen(filename, "rb"); 129 | if (!in) { 130 | fprintf(stderr, "Could not open '%s'\n", filename); 131 | rc = 1; 132 | } 133 | size_t size = 0; 134 | if (!rc) { 135 | fseek(in, 0, SEEK_END); 136 | size = ftell(in); 137 | fseek(in, 0, SEEK_SET); 138 | if (size > buf_size || 1 != fread(buf, size, 1, in)) { 139 | fprintf(stderr, "Could not read '%s' size %ld\n", filename, size); 140 | rc = 1; 141 | } else { 142 | if (ifdef) printf("#ifndef COMPRESS_TEXT\n"); 143 | strcpy(filename, fn); 144 | replace(filename, '.', '_'); 145 | dump(filename, "", buf, size); 146 | if (ifdef) printf("#endif\n"); 147 | printf("\n"); 148 | } 149 | } 150 | if (in) fclose(in); 151 | *size_out = size; 152 | return rc; 153 | } 154 | 155 | int main(int argc, char **argv) { 156 | int rc = 0; 157 | struct stat sb; 158 | if (argc != 2 || stat(argv[1], &sb) || !(sb.st_mode & S_IFDIR)) { 159 | fprintf(stderr, "expected valid source path argument."); 160 | rc = 1; 161 | } 162 | 163 | static uint8_t buf[4096]; 164 | 165 | if (!rc) { 166 | size_t welcome_html_len; 167 | rc = read_fully_and_write_header(argv[1], "welcome.html", buf, sizeof(buf), &welcome_html_len, true); 168 | if (!rc) { 169 | rc = compress(buf, welcome_html_len, "welcome_html"); 170 | } 171 | if (!rc) { 172 | for (uint i = 0; i < welcome_html_len - 12; i++) { 173 | if (!memcmp(buf + i, "aabbccddeeff", 12)) { 174 | static int x; 175 | printf("#define welcome_html_version_offset_%d %d\n", ++x, i); 176 | } 177 | } 178 | printf("\n"); 179 | } 180 | } 181 | 182 | if (!rc) { 183 | size_t info_uf2_len; 184 | rc = read_fully_and_write_header(argv[1], "info_uf2.txt", buf, sizeof(buf), &info_uf2_len, false); 185 | } 186 | 187 | if (!rc) { 188 | struct scsi_inquiry_response copy = scsi_ir; 189 | copy.rmb = 0x00, // should be 0x80 but we can't compress that so we use 0 and fill it later 190 | rc = compress((uint8_t *) ©, sizeof(copy), "scsi_ir"); 191 | } 192 | 193 | return 0; 194 | } -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(bit_functions_test 2 | bit_functions_test.c 3 | ../bootrom/bit_functions.S) 4 | 5 | target_compile_definitions(bit_functions_test PRIVATE 6 | NEW_BIT_FUNCTIONS=1 7 | USE_POPCOUNT32 8 | USE_CLZ32 9 | USE_CTZ32 10 | USE_REVERSE32) 11 | 12 | target_link_libraries(bit_functions_test PRIVATE pico_stdlib) 13 | pico_add_extra_outputs(bit_functions_test) 14 | 15 | add_executable(mem_functions_test 16 | mem_functions_test.c 17 | ../bootrom/bootrom_misc.S) 18 | 19 | target_link_libraries(mem_functions_test PRIVATE pico_stdlib) 20 | pico_add_extra_outputs(mem_functions_test) 21 | 22 | add_executable(tc_rom_float tc_rom_float.c) 23 | add_executable(tc_rom_double tc_rom_double.c) 24 | 25 | # don't redirect C floating point code thru the bootrom! 26 | pico_set_float_implementation(tc_rom_float compiler) 27 | pico_set_float_implementation(tc_rom_double compiler) 28 | pico_set_double_implementation(tc_rom_float compiler) 29 | pico_set_double_implementation(tc_rom_double compiler) 30 | 31 | target_link_libraries(tc_rom_float PRIVATE pico_stdlib) 32 | target_link_libraries(tc_rom_double PRIVATE pico_stdlib) 33 | 34 | pico_add_extra_outputs(tc_rom_float) 35 | pico_add_extra_outputs(tc_rom_double) 36 | -------------------------------------------------------------------------------- /test/bit_functions_test.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include 8 | #include 9 | #include "pico/stdlib.h" 10 | #include "pico/bootrom.h" 11 | #include "tictoc.h" 12 | 13 | extern uint32_t popcount32(uint32_t v); 14 | extern uint32_t reverse32(uint32_t v); 15 | extern uint32_t clz32(uint32_t v); 16 | extern uint32_t ctz32(uint32_t v); 17 | 18 | #define ASSERT(x) if (!(x)) { panic("ASSERT: %s l %d: " #x "\n" , __FILE__, __LINE__); } 19 | 20 | static bool roughly_ascii(uint x) { 21 | return x >= 32 && x < 127; 22 | } 23 | 24 | static int check_table(uint16_t *table, uint align_mask, uint align_value) { 25 | static uint16_t entries[256]; 26 | uint32_t (*table_lookup)(uint16_t *, uint32_t) = (uint32_t (*)(uint16_t *, uint32_t))(uint32_t)(*(uint16_t *)0x18); 27 | int n = 0; 28 | while (*table && n != count_of(entries)) { 29 | printf("Checking %c%c %d %d\n", table[0], table[0]>>8u, align_mask, align_value); 30 | for(int i=0; i> 8u)); 35 | if ((table[0] & 0xffu) != 'F' || (table[0] >> 8u) != 'Z') // this is byte aligned, so no check 36 | ASSERT(align_value == (table[1] & align_mask)); // check alignment 37 | entries[n++] = table[0]; 38 | ASSERT(table_lookup(table, table[0]) == table[1]); // check that looking up the value works correctly 39 | table += 2; 40 | } 41 | ASSERT(n); // table should not be empty 42 | ASSERT(n != count_of(entries)); // table should terminate relatively quickly 43 | ASSERT(0 == table_lookup(table, rom_table_code('Z','Z'))); 44 | return 0; 45 | } 46 | 47 | // slightly slower than ours, but known to be good 48 | static uint32_t __noinline bit_reverse(uint32_t i) { 49 | i = ((i & 0x55555555u) << 1u) | ((i >> 1u) & 0x55555555u); 50 | i = ((i & 0x33333333u) << 2u) | ((i >> 2u) & 0x33333333u); 51 | i = ((i & 0x0f0f0f0fu) << 4u) | ((i >> 4u) & 0x0f0f0f0fu); 52 | return __bswap32(i); 53 | } 54 | 55 | void check_popcount32(uint32_t i, uint32_t o) { 56 | ASSERT(o == __builtin_popcount(i)); 57 | } 58 | 59 | void check_reverse32(uint32_t i, uint32_t o) { 60 | ASSERT(o == bit_reverse(i)); 61 | } 62 | 63 | void check_clz32(uint32_t i, uint32_t o) { 64 | if (i && o != __builtin_clz(i)) { 65 | panic("INPUT %08x EXPECTED %08x GOT %08x\n", i, __builtin_clz(i), o); 66 | } 67 | } 68 | 69 | void check_ctz32(uint32_t i, uint32_t o) { 70 | if (i && o != __builtin_ctz(i)) { 71 | panic("INPUT %08x EXPECTED %08x GOT %08x\n", i, __builtin_clz(i), o); 72 | } 73 | } 74 | 75 | static int check_bit_function_internal(uint32_t i, uint32_t (*test_fn)(uint32_t), void (*check_fn)(uint32_t, uint32_t)) 76 | { 77 | uint32_t v = test_fn(i); 78 | if (check_fn) { 79 | check_fn(i, v); 80 | } 81 | return 0; 82 | } 83 | 84 | static int check_bit_function(uint32_t i, uint32_t (*test_fn)(uint32_t), void (*check_fn)(uint32_t, uint32_t), bool bitreverse_input) { 85 | check_bit_function_internal(i, test_fn, check_fn); 86 | check_bit_function_internal(i ^ 0xffffffffu, test_fn, check_fn); 87 | if (bitreverse_input) { 88 | uint32_t rev = bit_reverse(i); 89 | check_bit_function_internal(rev, test_fn, check_fn); 90 | check_bit_function_internal(rev ^ 0xffffffffu, test_fn, check_fn); 91 | } 92 | return 0; 93 | } 94 | 95 | __attribute__((naked)) uint32_t empty_fn() { 96 | asm ("bx lr"); 97 | } 98 | 99 | // note this is not quite exhaustive; it does not test input 0 100 | #define run_exhaustive(name, func, alternate_func) \ 101 | { \ 102 | printf("Exhaustive %s...\n", name); \ 103 | absolute_time_t t = get_absolute_time(); \ 104 | for(uint32_t i=0xffffffff; i>0; i--) { \ 105 | if (func(i) != alternate_func(i)) { \ 106 | panic("%s failed at %08x\n", name, i); \ 107 | } \ 108 | if (!(i&0xfffff)) { \ 109 | int64_t elapsed = time_diff(t, get_absolute_time()); \ 110 | int64_t expected = 4096 * elapsed / (4096 - (i>>20)); \ 111 | int32_t remaining_secs = (expected - elapsed) / 1000000; \ 112 | printf("\r%d %ds ", (i>>20), remaining_secs); \ 113 | } \ 114 | } \ 115 | printf("\n"); \ 116 | } 117 | 118 | uint32_t __42(uint32_t x) { 119 | return 42; 120 | } 121 | 122 | int __time_critical_func(main)() 123 | { 124 | setup_default_uart(); 125 | 126 | srand(0xf005ba11); 127 | 128 | uint16_t *func_table = (uint16_t *)(uint32_t)*(uint16_t *) 0x14; 129 | uint16_t *data_table = (uint16_t *)(uint32_t)*(uint16_t *) 0x16; 130 | 131 | check_table(func_table, 1, 1); // odd values only 132 | // todo this is not true - we should check they are naturally aligned 133 | // convention is for these to be word aligned 134 | check_table(data_table, 3, 0); 135 | 136 | extern uint32_t __clzsi2(uint32_t); 137 | extern uint32_t __ctzsi2(uint32_t); 138 | extern uint32_t __popcountsi2(uint32_t); 139 | static struct { 140 | const char *name; 141 | uint32_t (*func)(uint32_t); 142 | void (*check)(uint32_t, uint32_t); 143 | } tests[] = { 144 | { "empty", empty_fn, NULL, }, 145 | { "popcount32", popcount32, check_popcount32, }, 146 | { "reverse32", reverse32, check_reverse32, }, 147 | { "clz32", clz32, check_clz32, }, 148 | { "ctz32", ctz32, check_ctz32, }, 149 | { "__clzsi2", __clzsi2, NULL, }, 150 | { "__clzsi2", __ctzsi2, NULL, }, 151 | { "__popcountsi2", __popcountsi2, NULL, }, 152 | }; 153 | int n_checks = 1 << 16; 154 | for (int t = 1; t<5;t++) { 155 | printf("Testing %s...\n", tests[t].name); 156 | for(int i = 0; i <= n_checks; i++) { 157 | check_bit_function(i, tests[t].func, tests[t].check, true); 158 | check_bit_function(-i, tests[t].func, tests[t].check, true); 159 | check_bit_function((uint) rand(), tests[t].func, tests[t].check, true); 160 | } 161 | } 162 | 163 | ASSERT(clz32(0) == 32); 164 | ASSERT(clz32(-1) == 0); 165 | ASSERT(ctz32(0) == 32); 166 | ASSERT(ctz32(-1) == 0); 167 | 168 | tictoc_init(); 169 | // timing 170 | n_checks = 1 << 13; // about the max we can test before rollover 171 | uint32_t *rands = (uint32_t *)calloc(n_checks, sizeof(uint32_t)); 172 | for(int i=0;i>8u; 187 | double clocks = delta / (6.0 * n_checks); 188 | printf("%s %2.2g cycles\n", tests[t].name, clocks); 189 | } 190 | free(rands); 191 | 192 | #if 0 193 | run_exhaustive("clz32", clz32, __clzsi2); 194 | run_exhaustive("ctz32", ctz32, __ctzsi2); 195 | run_exhaustive("popcount32", popcount32, __popcountsi2); 196 | run_exhaustive("reverse32", reverse32, bit_reverse); 197 | #endif 198 | printf("Done\n"); 199 | } 200 | -------------------------------------------------------------------------------- /test/mem_functions_test.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include 8 | #include 9 | #include "pico/stdlib.h" 10 | #include "tictoc.h" 11 | 12 | #define ASSERT(x) if (!(x)) { panic("ASSERT: %s l %d: " #x "\n" , __FILE__, __LINE__); } 13 | 14 | typedef void *(*memcpy_func)(void *a, void *b, uint len); 15 | memcpy_func memcpy_general, memcpy_44; 16 | 17 | __attribute__((naked)) void *memcpy_slow(void *a, void *b, uint c) { 18 | asm ( 19 | "cmp r2, #0\n" 20 | "beq 1f\n" 21 | "mov ip, r0\n" 22 | "2:\n" 23 | "sub r2, #1\n" 24 | "ldrb r3, [r1, r2]\n" 25 | "strb r3, [r0, r2]\n" 26 | "bne 2b\n" 27 | "mov r0, ip\n" 28 | "1:\n" 29 | "bx lr\n" 30 | ); 31 | } 32 | 33 | __attribute__((naked)) void *memset_slow(void *a, int v, uint c) { 34 | asm( 35 | "cmp r2, #0\n" 36 | "beq 1f\n" 37 | "mov ip, r0\n" 38 | "2: \n" 39 | "sub r2, #1\n" 40 | "strb r1, [r0, r2] \n" 41 | "bne 2b\n" 42 | "mov r0, ip\n" 43 | "1:\n" 44 | "bx lr \n" 45 | ); 46 | } 47 | 48 | 49 | #define TIC t1=cyc(); 50 | #define TOC(x) t1=cyc()-t1; x=t1>>8u; x-=3; // timing overhead 51 | 52 | void * __noinline tictoc_memcpy_slow(uint8_t *a, uint8_t *b, uint c, uint32_t *t) { 53 | uint t1 = 0; 54 | TIC; 55 | void *rc = memcpy_slow(a, b, c); 56 | TOC(*t); 57 | return rc; 58 | } 59 | 60 | void * __noinline tictoc_memcpy_general(uint8_t *a, uint8_t *b, uint c, uint32_t *t) { 61 | uint t1 = 0; 62 | TIC; 63 | void *rc = memcpy_general(a, b, c); 64 | TOC(*t); 65 | return rc; 66 | } 67 | 68 | void * __noinline tictoc_memcpy(uint8_t *a, uint8_t *b, uint c, uint32_t *t) { 69 | uint t1 = 0; 70 | TIC; 71 | void *rc = memcpy(a, b, c); 72 | TOC(*t); 73 | return rc; 74 | } 75 | 76 | static inline uint64_t rotl(const uint64_t x, int k) { 77 | return (x << k) | (x >> (64 - k)); 78 | } 79 | 80 | static uint64_t s[4] = {0x5a5a5a5a5a5a5a5au, 0xf005ba11deadbeefu, 0xb007c0d3caf3bab3u, 0x0123456789abcdefu}; 81 | 82 | uint64_t xrand(void) { 83 | const uint64_t result = rotl(s[0] + s[3], 23) + s[0]; 84 | 85 | const uint64_t t = s[1] << 17; 86 | 87 | s[2] ^= s[0]; 88 | s[3] ^= s[1]; 89 | s[1] ^= s[2]; 90 | s[0] ^= s[3]; 91 | 92 | s[2] ^= t; 93 | 94 | s[3] = rotl(s[3], 45); 95 | 96 | return result; 97 | } 98 | 99 | void *__memcpy(void *a, void *b, uint len); 100 | void *__memcpy_44(void *a, void *b, uint len); 101 | 102 | static int check_memcpy() { 103 | printf("------------------- MEMCPY ---------------------\n"); 104 | int ret = 0; 105 | memcpy_general = __memcpy; 106 | memcpy_44 = __memcpy_44; 107 | if (!memcpy_general || !memcpy_44) { 108 | printf("Doh\n"); 109 | ret = 1; 110 | } 111 | 112 | static uint8_t source[1024]; 113 | static uint8_t desta[1024]; 114 | static uint8_t destb[1024]; 115 | static uint8_t destc[1024]; 116 | for(int len = 0 ; !ret && len < 1020; len++) { 117 | for(int i = 0; i+%d len = %d (%p/%p/%d)\n", v, src_off, dst_off, len, a, b, x); 149 | mc(destb + dst_off, source + src_off, len); // for debugging 150 | ret = 1; 151 | } 152 | } 153 | } 154 | } 155 | } 156 | tictoc_init(); 157 | 158 | for(int len = 0 ; !ret && len < 128; len++) 159 | { 160 | for(int src_off = 0; !ret && src_off < 4; src_off++) 161 | { 162 | for(int dst_off = 0; !ret && dst_off < 4; dst_off++) 163 | { 164 | uint32_t ta, tb, tc; 165 | void *a = tictoc_memcpy_slow(desta + dst_off, source + src_off, len, &ta); 166 | void *b = tictoc_memcpy_general(destb + dst_off, source + src_off, len, &tb); 167 | __unused void *c = tictoc_memcpy(destc + dst_off, source + src_off, len, &tc); 168 | b += desta - destb; 169 | uint x = memcmp(desta, destb, 1024); 170 | if (a != b || x) 171 | { 172 | printf("Failed +%d->+%d len = %d (%p/%p/%d)\n", src_off, dst_off, len, a, b, x); 173 | ret = 1; 174 | } else { 175 | printf("+%d->+%d len = %d\t%d\t%d\t%d\n", src_off, dst_off, len, (int)ta, (int)tb, (int)tc); 176 | } 177 | } 178 | } 179 | } 180 | return ret; 181 | } 182 | 183 | typedef void *(*memset_func)(void *a, int c, uint len); 184 | memset_func memset_general, memset_4; 185 | 186 | void *__memset(void *a, int c, uint len); 187 | void *__memset_4(void *a, int c, uint len); 188 | 189 | void * __noinline tictoc_memset_slow(uint8_t *a, int b, uint c, uint32_t *t) { 190 | uint t1 = 0; 191 | TIC; 192 | void *rc = memset_slow(a, b, c); 193 | TOC(*t); 194 | return rc; 195 | } 196 | 197 | void * __noinline tictoc_memset_general(uint8_t *a, int b, uint c, uint32_t *t) { 198 | uint t1 = 0; 199 | TIC; 200 | void *rc = memset_general(a, b, c); 201 | TOC(*t); 202 | return rc; 203 | } 204 | 205 | void * __noinline tictoc_memset(uint8_t *a, int b, uint c, uint32_t *t) { 206 | uint t1 = 0; 207 | TIC; 208 | void *rc = memset(a, b, c); 209 | TOC(*t); 210 | return rc; 211 | } 212 | 213 | static int check_memset() { 214 | printf("------------------- MEMSET ---------------------\n"); 215 | int ret = 0; 216 | memset_general = __memset; 217 | memset_4 = __memset_4; 218 | if (!memset_general || !memset_4) { 219 | printf("Doh\n"); 220 | ret = 1; 221 | } 222 | 223 | static uint8_t desta[1024]; 224 | static uint8_t destb[1024]; 225 | static uint8_t destc[1024]; 226 | for(int len = 0 ; !ret && len < 1020; len++) { 227 | for(int dst_off = 0; !ret && dst_off < 4; dst_off++) { 228 | memset_func ms; 229 | for(int v = 0; v < 3; v++) { 230 | switch (v) { 231 | case 0: 232 | ms = memset_slow; 233 | break; 234 | case 1: 235 | ms = memset_general; 236 | break; 237 | default: 238 | if (dst_off) { 239 | continue; 240 | } 241 | ms = memset_4; 242 | } 243 | memset(desta, 0, len); 244 | memset(destb, 0, len); 245 | int c = (1u + xrand()) & 0xfeu; 246 | void *a = memset(desta + dst_off, c, len); 247 | void *b = ms(destb + dst_off, c, len); 248 | b += desta - destb; 249 | uint x = memcmp(desta, destb, 1024); 250 | if (a != b || x) { 251 | printf("Failed v %d +%d len = %d (%p/%p/%d)\n", v, dst_off, len, a, b, x); 252 | ms(destb + dst_off, c, len); // for debugging 253 | ret = 1; 254 | } 255 | } 256 | } 257 | } 258 | *(volatile unsigned int *)0xe000e010=5; // enable SYSTICK at core clock 259 | 260 | for(int len = 0 ; !ret && len < 128; len++) 261 | { 262 | for(int dst_off = 0; !ret && dst_off < 4; dst_off++) 263 | { 264 | int c = (1 + xrand()) & 0xfeu; 265 | uint32_t ta, tb, tc; 266 | void *a = tictoc_memset_slow(desta + dst_off, c, len, &ta); 267 | void *b = tictoc_memset_general(destb + dst_off, c, len, &tb); 268 | __unused void *_c = tictoc_memset(destc + dst_off, c, len, &tc); 269 | b += desta - destb; 270 | uint x = memcmp(desta, destb, 1024); 271 | if (a != b || x) 272 | { 273 | printf("Failed +%d len = %d (%p/%p/%d)\n", dst_off, len, a, b, x); 274 | ret = 1; 275 | } else { 276 | printf("+%d len = %d\t%d\t%d\t%d\n", dst_off, len, (int)ta, (int)tb, (int)tc); 277 | } 278 | } 279 | } 280 | return ret; 281 | } 282 | 283 | int main() { 284 | setup_default_uart(); 285 | ASSERT(!check_memcpy()); 286 | ASSERT(!check_memset()); 287 | printf("OK\n"); 288 | return 0; 289 | } -------------------------------------------------------------------------------- /test/tc_rom_double.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "pico/stdlib.h" 5 | #include "pico/bootrom.h" 6 | 7 | // NOTE THIS IS JUST A SMOKE TEST OF ALL FLOAT FUNCtiONS, NOT AN EXHAUSTIVE CORRECTNESS TEST 8 | 9 | #define ASSERT(x) if (!(x)) { panic("ASSERT: %s l %d: " #x "\n" , __FILE__, __LINE__); } 10 | 11 | typedef union { 12 | double d; 13 | uint64_t l; 14 | } double_value; 15 | 16 | double_value least_neg_norm = { .l = 0x8010000000000000ll }; 17 | double_value most_neg_denorm = { .l = 0x800fffffffffffffll }; 18 | double_value random_neg_denorm = { .l = 0x800123456789abcdll }; 19 | double_value least_neg_denorm = { .l = 0x8000000000000001ll }; 20 | double_value least_neg_denorm2 = { .l = 0x8000000000000002ll }; 21 | double_value minus_zero = { .l = 0x8000000000000000ll }; 22 | double_value zero = { .l = 0x0000000000000000ll }; 23 | double_value least_pos_denorm = { .l = 0x0000000000000001ll }; 24 | double_value least_pos_denorm2 = { .l = 0x0000000000000002ll }; 25 | double_value random_pos_denorm = { .l = 0x000123456789abcdll }; 26 | double_value most_pos_denorm = { .l = 0x000fffffffffffffll }; 27 | double_value least_pos_norm = { .l = 0x0010000000000000ll }; 28 | 29 | typedef int (*i3func)(int, int); 30 | 31 | struct mufp_funcs { 32 | double (*mufp_dadd)(double, double); 33 | double (*mufp_dsub)(double, double); 34 | double (*mufp_dmul)(double, double); 35 | double (*mufp_ddiv)(double, double); 36 | int (*mufp_dcmp_fast)(double, double); 37 | void (*mufp_dcmp_fast_flags)(double, double); 38 | double (*mufp_dsqrt)(double); 39 | int (*mufp_double2int)(double); 40 | int (*mufp_double2fix)(double, int); 41 | uint (*mufp_double2uint)(double); 42 | uint (*mufp_double2ufix)(double, int); 43 | double (*mufp_int2double)(int); 44 | double (*mufp_fix2double)(int, int); 45 | double (*mufp_uint2double)(uint); 46 | double (*mufp_ufix2double)(uint, int); 47 | double (*mufp_dcos)(double); 48 | double (*mufp_dsin)(double); 49 | double (*mufp_dtan)(double); 50 | double (*v3_mufp_dsincos)(double); 51 | double (*mufp_dexp)(double); 52 | double (*mufp_dln)(double); 53 | 54 | // these are in rom version 2 55 | int (*mufp_dcmp)(double, double); 56 | double (*mufp_datan2)(double, double); 57 | double (*mufp_int642double)(int64_t); 58 | double (*mufp_fix642double)(int64_t, int); 59 | double (*mufp_uint642double)(int64_t); 60 | double (*mufp_ufix642double)(int64_t, int); 61 | int64_t (*mufp_double2int64)(double); 62 | int64_t (*mufp_double2fix64)(double, int); 63 | int64_t (*mufp_double2uint64)(double); 64 | int64_t (*mufp_double2ufix64)(double, int); 65 | 66 | float (*mufp_double2float)(double); 67 | } *mufp_funcs; 68 | 69 | double __noinline dadd(double a, double b) { 70 | return a + b; 71 | } 72 | 73 | double __noinline dsub(double a, double b) { 74 | return a - b; 75 | } 76 | 77 | double __noinline dmul(double a, double b) { 78 | return a * b; 79 | } 80 | 81 | double __noinline ddiv(double a, double b) { 82 | return a / b; 83 | } 84 | 85 | int __noinline dcmp_fast(double a, double b) { 86 | return a < b ? - 1 : (a > b ? 1 : 0); 87 | } 88 | 89 | static double flush(double x) { 90 | double_value val = { .d = x }; 91 | if (val.l >= zero.l && val.l <= most_pos_denorm.l) x = 0; 92 | if (val.l >= minus_zero.l && val.l <= most_neg_denorm.l) x = 0; 93 | return x; 94 | } 95 | 96 | int __noinline dcmp(double a, double b) { 97 | return dcmp_fast(flush(a), flush(b)); 98 | } 99 | 100 | double __noinline dsqrt(double a) { 101 | return sqrtf(a); 102 | } 103 | 104 | int __noinline double2int(double a) { 105 | return (int)a; 106 | } 107 | 108 | int64_t __noinline double2int64(double a) { 109 | return (int64_t)a; 110 | } 111 | 112 | int __noinline double2fix(double a, int b) { 113 | return (int)(a * pow(2.0, b)); 114 | } 115 | 116 | int64_t __noinline double2fix64(double a, int b) { 117 | return (int64_t)(a * powf(2.0, b)); 118 | } 119 | 120 | uint __noinline double2uint(double a) { 121 | // we do this which seems more useful... a wrapper for casting can choose to call double2int instead and cast that as uint if it wants 122 | return a < 0 ? 0 : (uint) a; 123 | } 124 | 125 | uint64_t __noinline double2uint64(double a) { 126 | // we do this which seems more useful... a wrapper for casting can choose to call double2int instead and cast that as uint if it wants 127 | return a < 0 ? 0 : (uint64_t) a; 128 | } 129 | 130 | uint __noinline double2ufix(double a, int b) { 131 | if (a < 0) return 0; 132 | return (uint)(a * pow(2.0, b)); 133 | } 134 | 135 | uint64_t __noinline double2ufix64(double a, int b) { 136 | if (a < 0) return 0; 137 | return (uint64_t)(a * powf(2.0, b)); 138 | } 139 | 140 | double int2double(int a) { 141 | return (double)a; 142 | } 143 | 144 | double int642double(int64_t a) { 145 | return (double)a; 146 | } 147 | 148 | double __noinline fix2double(int a, int b) { 149 | return ((double)a) / pow(2.0, b); 150 | } 151 | 152 | double __noinline fix642double(int64_t a, int b) { 153 | return ((double)a) / powf(2.0, b); 154 | } 155 | 156 | double uint2double(uint a) { 157 | return (double)a; 158 | } 159 | 160 | double uint642double(uint64_t a) { 161 | return (double)a; 162 | } 163 | 164 | double ufix2double(uint a, int b) { 165 | return ((double)a) / pow(2.0, b); 166 | } 167 | 168 | double ufix642double(uint64_t a, int b) { 169 | return ((double)a) / powf(2.0, b); 170 | } 171 | 172 | float double2float(double a) { 173 | return (float)a; 174 | } 175 | 176 | double __noinline dcos(double a) { 177 | return cos(a); 178 | } 179 | 180 | double __noinline dsin(double a) { 181 | return sin(a); 182 | } 183 | 184 | double __noinline dtan(double a) { 185 | return tan(a); 186 | } 187 | 188 | double __noinline datan2(double a, double b) { 189 | return atan2(a, b); 190 | } 191 | 192 | double __noinline dexp(double a) { 193 | return exp(a); 194 | } 195 | 196 | double __noinline dln(double a) { 197 | return log(a); 198 | } 199 | 200 | // yuk our ee_printf crashses on infinites and doubles 201 | #define safe_for_print(x) (float)((x) != (x) ? -12301.0 : ((x) == infinity() ? -12302.0 : ((x) == -infinity() ? -12303.0 : (x)))) 202 | // want typeof, but don't want to change build to use new C version at this point 203 | #define check_double(a, b) ({if (!(a == b || abs((a)-(b)) < 1e-11f)) printf("%f != %f %s\n", safe_for_print(a), safe_for_print(b), __STRING((b))); ASSERT(a == b || abs((a)-(b)) < 1e-11f); }) 204 | #define check_int(a, b) ({if ((a)!=(b)) printf("%d != %d %s\n", a, b, __STRING((b))); ASSERT((a) == (b)); }) 205 | #define check_uint(a, b) ({if ((a)!=(b)) printf("%u != %u %s\n", a, b, __STRING((b))); ASSERT((a) == (b)); }) 206 | #define check_int64(a, b) ({if ((a)!=(b)) printf("%08x%08x != %08x%08x %s\n", (int)(a>>32), (int)a, (int)(b>>32), (int)b, __STRING((b))); ASSERT((a) == (b)); }) 207 | #define check_uint64(a, b) ({if ((a)!=(b)) printf("%08x%08x != %08x%08x %s\n", (int)(a>>32), (int)a, (int)(b>>32), (int)b, __STRING((b))); ASSERT((a) == (b)); }) 208 | 209 | #define check_double_fn1(fn, a) check_double(fn(a), mufp_funcs->mufp_##fn(a)) 210 | #define check_double_fn2(fn, a, b) check_double(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 211 | #define check_int_fn1(fn, a) check_int(fn(a), mufp_funcs->mufp_##fn(a)) 212 | #define check_int64_fn1(fn, a) check_int64(fn(a), mufp_funcs->mufp_##fn(a)) 213 | #define check_int_fn2(fn, a, b) check_int(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 214 | #define check_int64_fn2(fn, a, b) check_int64(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 215 | #define check_uint_fn1(fn, a) check_uint(fn(a), mufp_funcs->mufp_##fn(a)) 216 | #define check_uint64_fn1(fn, a) check_uint64(fn(a), mufp_funcs->mufp_##fn(a)) 217 | #define check_uint_fn2(fn, a, b) check_uint(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 218 | #define check_uint64_fn2(fn, a, b) check_uint64(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 219 | 220 | int __attribute__((naked)) dcmp_from_dcmp_flags(double a, double b, int (*dmcp_flags)(double, double)) { 221 | asm( 222 | "push {r4, r5, lr}\n" 223 | "mov r4, #1\n" 224 | "ldr r5, [sp, #0xc]\n" // dcmp_flags param 225 | "blx r5\n" 226 | "bge 1f\n" 227 | "neg r4, r4\n" 228 | "1:\n" 229 | "bne 1f\n" 230 | "sub r4, r4\n" 231 | "1:\n" 232 | "mov r0, r4\n" 233 | "pop {r4, r5, pc}\n" 234 | ); 235 | } 236 | 237 | double __attribute__((naked)) call_dsincos(double v, double *cout, double (*func)(double)) { 238 | asm( 239 | "push {r2, r4, lr}\n" 240 | "blx r3\n" 241 | "pop {r4}\n" 242 | "stmia r4!, {r2, r3}\n" 243 | "pop {r4, pc}\n" 244 | ); 245 | } 246 | 247 | #define check_dcmp_flags(a,b) check_int(dcmp(a, b), dcmp_from_dcmp_flags(a, b, (void (*)(double,double))mufp_funcs->mufp_dcmp)) // dcmp is dcmp_flags now 248 | #define check_dcmp_fast_flags(a,b) check_int(dcmp_fast(a, b), dcmp_from_dcmp_flags(a, b, mufp_funcs->mufp_dcmp_fast_flags)) 249 | 250 | int main() 251 | { 252 | setup_default_uart(); 253 | int rom_version = *(uint8_t*)0x13; 254 | printf("ROM VERSION %d\n", rom_version); 255 | if (rom_version == 1) { 256 | printf("ROM VERSION 1 HAS NO DOUBLE, SKIPPING\n"); 257 | exit(0); 258 | } 259 | 260 | srand(0xf005ba11); 261 | mufp_funcs = (struct mufp_funcs *)rom_data_lookup(rom_table_code('S','D')); 262 | ASSERT(mufp_funcs); 263 | 264 | uint8_t *func_count = (uint8_t *)rom_data_lookup(rom_table_code('F','Z')); 265 | assert(func_count); 266 | assert(*func_count == sizeof(struct mufp_funcs) / 4); 267 | for(int i=0; i 1) { 287 | // todo check denormals 288 | check_int_fn2(dcmp, -3.0, 7.3); 289 | check_int_fn2(dcmp, 3.0, -7.3); 290 | check_int_fn2(dcmp, 3.0, 3.0); 291 | check_int_fn2(dcmp, 3.0, -infinity()); 292 | check_int_fn2(dcmp, 3.0, infinity()); 293 | 294 | check_int_fn2(dcmp, least_neg_denorm.d, most_neg_denorm.d); 295 | check_int_fn2(dcmp, most_neg_denorm.d, least_neg_denorm.d); 296 | check_int_fn2(dcmp, least_neg_denorm.d, least_neg_denorm.d); 297 | check_int_fn2(dcmp, least_neg_norm.d, least_neg_denorm.d); 298 | check_int_fn2(dcmp, least_neg_denorm.d, least_neg_denorm2.d); 299 | check_int_fn2(dcmp, least_neg_denorm.d, least_pos_denorm.d); 300 | check_int_fn2(dcmp, least_pos_denorm.d, most_pos_denorm.d); 301 | check_int_fn2(dcmp, most_pos_denorm.d, least_pos_denorm.d); 302 | check_int_fn2(dcmp, least_pos_denorm.d, least_pos_denorm.d); 303 | check_int_fn2(dcmp, least_pos_denorm.d, least_pos_denorm.d); 304 | check_int_fn2(dcmp, least_pos_norm.d, least_pos_denorm.d); 305 | check_int_fn2(dcmp, least_pos_denorm.d, least_pos_denorm2.d); 306 | check_int_fn2(dcmp, least_pos_denorm.d, least_neg_denorm.d); 307 | } 308 | 309 | check_int_fn2(dcmp_fast, -3.0, 7.3); 310 | check_int_fn2(dcmp_fast, 3.0, -7.3); 311 | check_int_fn2(dcmp_fast, 3.0, 3.0); 312 | check_int_fn2(dcmp_fast, 3.0, -infinity()); 313 | check_int_fn2(dcmp_fast, 3.0, infinity()); 314 | 315 | check_int_fn2(dcmp_fast, least_neg_denorm.d, most_neg_denorm.d); 316 | check_int_fn2(dcmp_fast, most_neg_denorm.d, least_neg_denorm.d); 317 | check_int_fn2(dcmp_fast, least_neg_denorm.d, least_neg_denorm.d); 318 | check_int_fn2(dcmp_fast, least_neg_norm.d, least_neg_denorm.d); 319 | check_int_fn2(dcmp_fast, least_neg_denorm.d, least_neg_denorm2.d); 320 | check_int_fn2(dcmp_fast, least_neg_denorm.d, least_pos_denorm.d); 321 | check_int_fn2(dcmp_fast, least_pos_denorm.d, most_pos_denorm.d); 322 | check_int_fn2(dcmp_fast, most_pos_denorm.d, least_pos_denorm.d); 323 | check_int_fn2(dcmp_fast, least_pos_denorm.d, least_pos_denorm.d); 324 | check_int_fn2(dcmp_fast, least_pos_denorm.d, least_pos_denorm.d); 325 | check_int_fn2(dcmp_fast, least_pos_norm.d, least_pos_denorm.d); 326 | check_int_fn2(dcmp_fast, least_pos_denorm.d, least_pos_denorm2.d); 327 | check_int_fn2(dcmp_fast, least_pos_denorm.d, least_neg_denorm.d); 328 | 329 | 330 | if (rom_version > 1) { 331 | // todo check denormals 332 | check_dcmp_flags(-3.0, 7.3); 333 | check_dcmp_flags(3.0, -7.3); 334 | check_dcmp_flags(3.0, 3.0); 335 | check_dcmp_flags(3.0, -infinity()); 336 | check_dcmp_flags(3.0, infinity()); 337 | 338 | check_dcmp_flags( least_neg_denorm.d, most_neg_denorm.d); 339 | check_dcmp_flags( most_neg_denorm.d, least_neg_denorm.d); 340 | check_dcmp_flags( least_neg_denorm.d, least_neg_denorm.d); 341 | check_dcmp_flags( least_neg_norm.d, least_neg_denorm.d); 342 | check_dcmp_flags( least_neg_denorm.d, least_neg_denorm2.d); 343 | check_dcmp_flags( least_neg_denorm.d, least_pos_denorm.d); 344 | check_dcmp_flags( least_pos_denorm.d, most_pos_denorm.d); 345 | check_dcmp_flags( most_pos_denorm.d, least_pos_denorm.d); 346 | check_dcmp_flags( least_pos_denorm.d, least_pos_denorm.d); 347 | check_dcmp_flags( least_pos_denorm.d, least_pos_denorm.d); 348 | check_dcmp_flags( least_pos_norm.d, least_pos_denorm.d); 349 | check_dcmp_flags( least_pos_denorm.d, least_pos_denorm2.d); 350 | check_dcmp_flags( least_pos_denorm.d, least_neg_denorm.d); 351 | } 352 | 353 | check_dcmp_fast_flags(-3.0, 7.3); 354 | check_dcmp_fast_flags(3.0, -7.3); 355 | check_dcmp_fast_flags(3.0, 3.0); 356 | check_dcmp_fast_flags(3.0, -infinity()); 357 | check_dcmp_fast_flags(3.0, infinity()); 358 | 359 | check_dcmp_fast_flags( least_neg_denorm.d, most_neg_denorm.d); 360 | check_dcmp_fast_flags( most_neg_denorm.d, least_neg_denorm.d); 361 | check_dcmp_fast_flags( least_neg_denorm.d, least_neg_denorm.d); 362 | check_dcmp_fast_flags( least_neg_norm.d, least_neg_denorm.d); 363 | check_dcmp_fast_flags( least_neg_denorm.d, least_neg_denorm2.d); 364 | check_dcmp_fast_flags( least_neg_denorm.d, least_pos_denorm.d); 365 | check_dcmp_fast_flags( least_pos_denorm.d, most_pos_denorm.d); 366 | check_dcmp_fast_flags( most_pos_denorm.d, least_pos_denorm.d); 367 | check_dcmp_fast_flags( least_pos_denorm.d, least_pos_denorm.d); 368 | check_dcmp_fast_flags( least_pos_denorm.d, least_pos_denorm.d); 369 | check_dcmp_fast_flags( least_pos_norm.d, least_pos_denorm.d); 370 | check_dcmp_fast_flags( least_pos_denorm.d, least_pos_denorm2.d); 371 | check_dcmp_fast_flags( least_pos_denorm.d, least_neg_denorm.d); 372 | 373 | check_double_fn1(dsqrt, 3.0); 374 | 375 | // we are returning INFINITE not NAN as we don't support NANs 376 | // check_double_fn1(fsqrt, -3.0); 377 | // todo right now qsqrt and fsqrt return opposite signed infinity 378 | #if 0 379 | ASSERT(infinity() == mufp_funcs->mufp_dsqrt(-3.0)); 380 | #else 381 | ASSERT(-infinity() == mufp_funcs->mufp_dsqrt(-3.0)); 382 | #endif 383 | 384 | check_int_fn1(double2int, 3.0); 385 | check_int_fn1(double2int, 123456000000.0); 386 | check_int_fn1(double2int, -3.0); 387 | check_int_fn1(double2int, -123456000000.0); 388 | check_int_fn1(double2int, infinity()); 389 | check_int_fn1(double2int, -infinity()); 390 | 391 | check_int_fn2(double2fix, 3.0, 3); 392 | check_int_fn2(double2fix, 31.0, -3); 393 | check_int_fn2(double2fix, -3.0, 3); 394 | // todo JURY IS OUT ON THIS ONE 395 | //check_int_fn2(double2fix, -31.0, -3); 396 | 397 | check_uint_fn1(double2uint, 3.0); 398 | check_uint_fn1(double2uint, 123456000000.0); 399 | check_uint_fn1(double2uint, -3.0); 400 | check_uint_fn1(double2uint, -123456000000.0); 401 | 402 | check_uint_fn2(double2ufix, 3.0, 3); 403 | check_uint_fn2(double2ufix, 3.0, -3); 404 | 405 | check_double_fn1(int2double, 3); 406 | check_double_fn1(int2double, INT32_MAX); 407 | check_double_fn1(int2double, INT32_MIN); 408 | check_double_fn1(int2double, -3); 409 | 410 | check_double_fn2(fix2double, 3, 3); 411 | check_double_fn2(fix2double, 3, -3); 412 | check_double_fn2(fix2double, -3, 3); 413 | check_double_fn2(fix2double, -3, -3); 414 | 415 | check_double_fn1(uint2double, 3); 416 | check_double_fn1(uint2double, UINT32_MAX); 417 | 418 | // double (*mufp_ufix2double)(uint, int); 419 | 420 | check_double_fn1(dcos, 0.0); 421 | check_double_fn1(dcos, 2.7); 422 | check_double_fn1(dcos, -32.7); 423 | 424 | check_double_fn1(dsin, 0.0); 425 | check_double_fn1(dsin, 2.7); 426 | check_double_fn1(dsin, -32.7); 427 | 428 | check_double_fn1(dtan, 0.0); 429 | check_double_fn1(dtan, 2.7); 430 | check_double_fn1(dtan, -32.7); 431 | 432 | if (rom_version > 1) { 433 | check_double_fn2(datan2, 3.0, 4.0); 434 | check_double_fn2(datan2, -3.0, 4.0); 435 | check_double_fn2(datan2, 4.0, -.31); 436 | check_double_fn2(datan2, -3.0, -.17); 437 | } 438 | 439 | check_double_fn1(dexp, 0.0); 440 | check_double_fn1(dexp, 2.7); 441 | check_double_fn1(dexp, -32.7); 442 | 443 | check_double_fn1(dln, 0.3); 444 | check_double_fn1(dln, 1.0); 445 | check_double_fn1(dln, 2.7); 446 | 447 | // we are returning -INFINITE as we don't support NANs 448 | // check_double_fn1(fln, -32.7); 449 | ASSERT(-INFINITY == mufp_funcs->mufp_dln(-32.7)); 450 | 451 | check_int64_fn1(double2int64, 3.0); 452 | check_int64_fn1(double2int64, 123456000000.0); 453 | check_int64_fn1(double2int64, 12345678912345.0); 454 | check_int64_fn1(double2int64, -3.0); 455 | check_int64_fn1(double2int64, -123456000000.0); 456 | check_int64_fn1(double2int64, -12345678912345.0); 457 | 458 | // seems like gcc is wrong on this one 459 | // check_int64_fn1(double2int64, INFINITY); 460 | // so 461 | ASSERT(INT64_MAX == mufp_funcs->mufp_double2int64(INFINITY)); 462 | 463 | // seems like gcc is wrong on this one 464 | // check_int64_fn1(double2int64, -INFINITY); 465 | // so 466 | ASSERT( INT64_MIN == mufp_funcs->mufp_double2int64(-INFINITY)); 467 | 468 | 469 | check_int64_fn2(double2fix64, 3.0, 3); 470 | check_int64_fn2(double2fix64, 31.0, -3); 471 | check_int64_fn2(double2fix64, -3.0, 3); 472 | // todo JURY IS OUT ON THIS ONE 473 | //check_int64_fn2(double2fix64, -31.0, -3); 474 | 475 | check_uint64_fn1(double2uint64, 3.0); 476 | check_uint64_fn1(double2uint64, 123456000000.0); 477 | check_uint64_fn1(double2uint64, 12345678912345.0); 478 | check_uint64_fn1(double2uint64, -3.0); 479 | check_uint64_fn1(double2uint64, -123456000000.0); 480 | check_uint64_fn1(double2uint64, -12345678912345.0); 481 | 482 | check_uint64_fn2(double2ufix64, 3.0, 3); 483 | check_uint64_fn2(double2ufix64, 3.0, 43); 484 | check_uint64_fn2(double2ufix64, 3.0, -3); 485 | check_uint64_fn2(double2ufix64, 3.0, -43); 486 | 487 | #define LARGE 0x1234567800000000ll 488 | check_double_fn1(int642double, 3); 489 | check_double_fn1(int642double, LARGE); 490 | check_double_fn1(int642double, INT32_MAX); 491 | check_double_fn1(int642double, INT64_MAX); 492 | check_double_fn1(int642double, INT32_MIN); 493 | check_double_fn1(int642double, INT64_MIN); 494 | check_double_fn1(int642double, -3); 495 | check_double_fn1(int642double, -LARGE); 496 | 497 | check_double_fn2(fix642double, 3, 3); 498 | check_double_fn2(fix642double, 3, -3); 499 | check_double_fn2(fix642double, -3, 3); 500 | check_double_fn2(fix642double, -3, -3); 501 | 502 | check_double_fn2(fix642double, LARGE, 3); 503 | check_double_fn2(fix642double, LARGE, -3); 504 | check_double_fn2(fix642double, -LARGE, 3); 505 | check_double_fn2(fix642double, -LARGE, -3); 506 | 507 | check_double_fn1(uint642double, 3); 508 | check_double_fn1(uint642double, UINT32_MAX); 509 | check_double_fn1(uint642double, UINT64_MAX); 510 | check_double_fn1(uint642double, LARGE); 511 | 512 | check_double_fn1(double2float, 3.0f); 513 | check_double_fn1(double2float, 3.0e12f); 514 | check_double_fn1(double2float, -3.0e12f); 515 | check_double_fn1(double2float, -3.13159275412321412565856845745); 516 | check_double_fn1(double2float, 3.13159275412321412565856845745); 517 | check_double_fn1(double2float, -INFINITY); 518 | check_double_fn1(double2float, INFINITY); 519 | check_double_fn1(double2float, most_pos_denorm.d); 520 | check_double_fn1(double2float, least_pos_denorm.d); 521 | check_double_fn1(double2float, least_pos_norm.d); 522 | check_double_fn1(double2float, least_neg_denorm.d); 523 | check_double_fn1(double2float, least_neg_norm.d); 524 | check_double_fn1(double2float, most_neg_denorm.d); 525 | check_double_fn1(double2float, 0.f); 526 | check_double_fn1(double2float, -0.f); 527 | 528 | check_double_fn1(double2float, 3.0e-58); 529 | check_double_fn1(double2float, 3.0e58); 530 | 531 | check_double_fn1(double2float, 3.0e68); 532 | check_double_fn1(double2float, -3.0e68); 533 | 534 | if (rom_version >= 3) { 535 | for(double a = -0.3f; a<7.f; a += 0.137) { 536 | double s = mufp_funcs->mufp_dsin(a); 537 | double c = mufp_funcs->mufp_dcos(a); 538 | double co=0; 539 | double so = call_dsincos(a, &co, mufp_funcs->v3_mufp_dsincos); 540 | ASSERT(so == s && co == c); 541 | } 542 | } 543 | printf("DOUBLE OK\n"); 544 | return 0; 545 | } 546 | -------------------------------------------------------------------------------- /test/tc_rom_float.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "pico/stdlib.h" 5 | #include "pico/bootrom.h" 6 | 7 | // NOTE THIS IS JUST A SMOKE TEST OF ALL FLOAT FUNCtiONS, NOT AN EXHAUSTIVE CORRECTNESS TEST 8 | 9 | #define ASSERT(x) if (!(x)) { panic("ASSERT: %s l %d: " #x "\n" , __FILE__, __LINE__); } 10 | 11 | typedef union { 12 | float f; 13 | uint32_t i; 14 | } float_value; 15 | 16 | float_value least_neg_norm = { .i = 0x80800000 }; 17 | float_value most_neg_denorm = { .i = 0x807fffff }; 18 | float_value random_neg_denorm = { .i = 0x80123456 }; 19 | float_value least_neg_denorm = { .i = 0x80000001 }; 20 | float_value minus_zero = { .i = 0x80000000 }; 21 | float_value zero = { .i = 0x00000000 }; 22 | float_value least_pos_denorm = { .i = 0x00000001 }; 23 | float_value random_pos_denorm = { .i = 0x00123456 }; 24 | float_value most_pos_denorm = { .i = 0x007fffff }; 25 | float_value least_pos_norm = { .i = 0x00800000 }; 26 | 27 | typedef int (*i3func)(int, int); 28 | 29 | struct mufp_funcs { 30 | float (*mufp_fadd)(float, float); 31 | float (*mufp_fsub)(float, float); 32 | float (*mufp_fmul)(float, float); 33 | float (*mufp_fdiv)(float, float); 34 | int (*mufp_fcmp_fast)(float, float); 35 | void (*mufp_fcmp_fast_flags)(float, float); 36 | float (*mufp_fsqrt)(float); 37 | int (*mufp_float2int)(float); 38 | int (*mufp_float2fix)(float, int); 39 | uint (*mufp_float2uint)(float); 40 | uint (*mufp_float2ufix)(float, int); 41 | float (*mufp_int2float)(int); 42 | float (*mufp_fix2float)(int, int); 43 | float (*mufp_uint2float)(uint); 44 | float (*mufp_ufix2float)(uint, int); 45 | float (*mufp_fcos)(float); 46 | float (*mufp_fsin)(float); 47 | float (*mufp_ftan)(float); 48 | float (*v3_mufp_fsincos)(float); 49 | float (*mufp_fexp)(float); 50 | float (*mufp_fln)(float); 51 | 52 | // these are in rom version 2 53 | int (*mufp_fcmp)(float, float); 54 | float (*mufp_fatan2)(float, float); 55 | float (*mufp_int642float)(int64_t); 56 | float (*mufp_fix642float)(int64_t, int); 57 | float (*mufp_uint642float)(int64_t); 58 | float (*mufp_ufix642float)(int64_t, int); 59 | int64_t (*mufp_float2int64)(float); 60 | int64_t (*mufp_float2fix64)(float, int); 61 | int64_t (*mufp_float2uint64)(float); 62 | int64_t (*mufp_float2ufix64)(float, int); 63 | double (*mufp_float2double)(float); 64 | 65 | } *mufp_funcs; 66 | 67 | float __noinline fadd(float a, float b) { 68 | return a + b; 69 | } 70 | 71 | float __noinline fsub(float a, float b) { 72 | return a - b; 73 | } 74 | 75 | float __noinline fmul(float a, float b) { 76 | return a * b; 77 | } 78 | 79 | float __noinline fdiv(float a, float b) { 80 | return a / b; 81 | } 82 | 83 | float flush(float x) { 84 | float_value val = { .f = x }; 85 | if (val.i >= zero.i && val.i <= most_pos_denorm.i) x = 0; 86 | if (val.i >= minus_zero.i && val.i <= most_neg_denorm.i) x = 0; 87 | return x; 88 | } 89 | 90 | int __noinline fcmp_fast(float a, float b) { 91 | return a < b ? - 1 : (a > b ? 1 : 0); 92 | } 93 | 94 | int __noinline fcmp(float a, float b) { 95 | return fcmp_fast(flush(a), flush(b)); 96 | } 97 | 98 | float __noinline fsqrt(float a) { 99 | return sqrtf(a); 100 | } 101 | 102 | int __noinline float2int(float a) { 103 | return (int)a; 104 | } 105 | 106 | int64_t __noinline float2int64(float a) { 107 | return (int64_t)a; 108 | } 109 | 110 | int __noinline float2fix(float a, int b) { 111 | return (int)(a * powf(2.f, b)); 112 | } 113 | 114 | int64_t __noinline float2fix64(float a, int b) { 115 | return (int64_t)(a * powf(2.f, b)); 116 | } 117 | 118 | uint __noinline float2uint(float a) { 119 | // we do this which seems more useful... a wrapper for casting can choose to call float2int instead and cast that as uint if it wants 120 | return a < 0 ? 0 : (uint) a; 121 | } 122 | 123 | uint64_t __noinline float2uint64(float a) { 124 | // we do this which seems more useful... a wrapper for casting can choose to call float2int instead and cast that as uint if it wants 125 | return a < 0 ? 0 : (uint64_t) a; 126 | } 127 | 128 | 129 | uint __noinline float2ufix(float a, int b) { 130 | if (a < 0) return 0; 131 | return (uint)(a * powf(2.f, b)); 132 | } 133 | 134 | uint64_t __noinline float2ufix64(float a, int b) { 135 | if (a < 0) return 0; 136 | return (uint64_t)(a * powf(2.f, b)); 137 | } 138 | 139 | float int2float(int a) { 140 | return (float)a; 141 | } 142 | 143 | float int642float(int64_t a) { 144 | return (float)a; 145 | } 146 | 147 | float __noinline fix2float(int a, int b) { 148 | return ((float)a) / powf(2.f, b); 149 | } 150 | 151 | float __noinline fix642float(int64_t a, int b) { 152 | return ((float)a) / powf(2.f, b); 153 | } 154 | 155 | float uint2float(uint a) { 156 | return (float)a; 157 | } 158 | 159 | float uint642float(uint64_t a) { 160 | return (float)a; 161 | } 162 | 163 | float ufix2float(uint a, int b) { 164 | return ((float)a) / powf(2.f, b); 165 | } 166 | 167 | float ufix642float(uint64_t a, int b) { 168 | return ((float)a) / powf(2.f, b); 169 | } 170 | 171 | double float2double(float a) { 172 | return (double)a; 173 | } 174 | 175 | float __noinline fcos(float a) { 176 | return cosf(a); 177 | } 178 | 179 | float __noinline fsin(float a) { 180 | return sinf(a); 181 | } 182 | 183 | float __noinline ftan(float a) { 184 | return tanf(a); 185 | } 186 | 187 | float __noinline fatan2(float a, float b) { 188 | return atan2f(a, b); 189 | } 190 | 191 | float __noinline fexp(float a) { 192 | return expf(a); 193 | } 194 | 195 | float __noinline fln(float a) { 196 | return logf(a); 197 | } 198 | 199 | // yuk our ee_printf crashses on infinites and floats 200 | #define safe_for_print(x) (x) != (x) ? -12301.f : ((x) == INFINITY ? -12302.f : ((x) == -INFINITY ? -12303.f : (x))) 201 | // want typeof, but don't want to change build to use new C version at this point 202 | #define check_float(a, b) ({if (!(a == b || fabsf((a)-(b)) < 1e-6f)) printf("%f != %f %s\n", safe_for_print(a), safe_for_print(b), __STRING((b))); ASSERT(a == b || fabsf((a)-(b)) < 1e-6f); }) 203 | #define check_int(a, b) ({if ((a)!=(b)) printf("%d != %d %s\n", a, b, __STRING((b))); ASSERT((a) == (b)); }) 204 | #define check_uint(a, b) ({if ((a)!=(b)) printf("%u != %u %s\n", a, b, __STRING((b))); ASSERT((a) == (b)); }) 205 | #define check_int64(a, b) ({if ((a)!=(b)) printf("%08x%08x != %08x%08x %s\n", (int)(a>>32), (int)a, (int)(b>>32), (int)b, __STRING((b))); ASSERT((a) == (b)); }) 206 | #define check_uint64(a, b) ({if ((a)!=(b)) printf("%08x%08x != %08x%08x %s\n", (int)(a>>32), (int)a, (int)(b>>32), (int)b, __STRING((b))); ASSERT((a) == (b)); }) 207 | 208 | #define check_float_fn1(fn, a) check_float(fn(a), mufp_funcs->mufp_##fn(a)) 209 | #define check_float_fn2(fn, a, b) check_float(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 210 | #define check_int_fn1(fn, a) check_int(fn(a), mufp_funcs->mufp_##fn(a)) 211 | #define check_int64_fn1(fn, a) check_int64(fn(a), mufp_funcs->mufp_##fn(a)) 212 | #define check_int_fn2(fn, a, b) check_int(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 213 | #define check_int64_fn2(fn, a, b) check_int64(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 214 | #define check_uint_fn1(fn, a) check_uint(fn(a), mufp_funcs->mufp_##fn(a)) 215 | #define check_uint64_fn1(fn, a) check_uint64(fn(a), mufp_funcs->mufp_##fn(a)) 216 | #define check_uint_fn2(fn, a, b) check_uint(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 217 | #define check_uint64_fn2(fn, a, b) check_uint64(fn(a, b), mufp_funcs->mufp_##fn(a, b)) 218 | 219 | int __attribute__((naked)) fcmp_from_fcmp_flags(float a, float b, int (*fmcp_flags)(float, float)) { 220 | asm( 221 | "push {r4, lr}\n" 222 | "mov r4, #1\n" 223 | "blx r2\n" 224 | "bge 1f\n" 225 | "neg r4, r4\n" 226 | "1:\n" 227 | "bne 1f\n" 228 | "sub r4, r4\n" 229 | "1:\n" 230 | "mov r0, r4\n" 231 | "pop {r4, pc}\n" 232 | ); 233 | } 234 | 235 | float __attribute__((naked)) call_fsincos(float v, float *cout, float (*func)(float)) { 236 | asm( 237 | "push {r1, lr}\n" 238 | "blx r2\n" 239 | "pop {r2}\n" 240 | "str r1, [r2]\n" 241 | "pop {pc}\n" 242 | ); 243 | } 244 | 245 | #define check_fcmp_flags(a,b) check_int(fcmp(a, b), fcmp_from_fcmp_flags(a, b, (void (*)(float, float))mufp_funcs->mufp_fcmp)) // f_cmp is f_cmp_flags now 246 | #define check_fcmp_fast_flags(a,b) check_int(fcmp_fast(a, b), fcmp_from_fcmp_flags(a, b, mufp_funcs->mufp_fcmp_fast_flags)) 247 | 248 | int main() 249 | { 250 | setup_default_uart(); 251 | srand(0xf005ba11); 252 | mufp_funcs = (struct mufp_funcs *)rom_data_lookup(rom_table_code('S','F')); 253 | ASSERT(mufp_funcs); 254 | 255 | int rom_version = *(uint8_t*)0x13; 256 | printf("ROM VERSION %d\n", rom_version); 257 | if (rom_version > 1) { 258 | uint8_t *func_count = (uint8_t *)rom_data_lookup(rom_table_code('F','Z')); 259 | assert(func_count); 260 | assert(*func_count == sizeof(struct mufp_funcs) / 4); 261 | } 262 | int valid_funcs_size = sizeof(struct mufp_funcs); 263 | if (rom_version == 1) { 264 | valid_funcs_size -= 11 * 4; 265 | } 266 | for(int i=0; i 1) { 286 | check_int_fn2(fcmp, -3.0f, 7.3f); 287 | check_int_fn2(fcmp, 3.0f, -7.3f); 288 | check_int_fn2(fcmp, 3.0f, 3.0f); 289 | check_int_fn2(fcmp, 3.0f, -INFINITY); 290 | check_int_fn2(fcmp, 3.0f, INFINITY); 291 | 292 | check_int_fn2(fcmp, least_neg_denorm.f, most_neg_denorm.f); 293 | check_int_fn2(fcmp, most_neg_denorm.f, least_neg_denorm.f); 294 | check_int_fn2(fcmp, least_neg_denorm.f, least_neg_denorm.f); 295 | check_int_fn2(fcmp, least_neg_norm.f, least_neg_denorm.f); 296 | check_int_fn2(fcmp, least_pos_denorm.f, most_pos_denorm.f); 297 | check_int_fn2(fcmp, most_pos_denorm.f, least_pos_denorm.f); 298 | check_int_fn2(fcmp, least_pos_denorm.f, least_pos_denorm.f); 299 | check_int_fn2(fcmp, least_pos_denorm.f, least_pos_denorm.f); 300 | check_int_fn2(fcmp, least_pos_norm.f, least_pos_denorm.f); 301 | 302 | } 303 | 304 | check_int_fn2(fcmp_fast, -3.0f, 7.3f); 305 | check_int_fn2(fcmp_fast, 3.0f, -7.3f); 306 | check_int_fn2(fcmp_fast, 3.0f, 3.0f); 307 | check_int_fn2(fcmp_fast, 3.0f, -INFINITY); 308 | check_int_fn2(fcmp_fast, 3.0f, INFINITY); 309 | check_int_fn2(fcmp_fast, minus_zero.f, zero.f); 310 | 311 | check_int_fn2(fcmp_fast, least_neg_denorm.f, most_neg_denorm.f); 312 | check_int_fn2(fcmp_fast, most_neg_denorm.f, least_neg_denorm.f); 313 | check_int_fn2(fcmp_fast, least_neg_denorm.f, least_neg_denorm.f); 314 | check_int_fn2(fcmp_fast, least_neg_norm.f, least_neg_denorm.f); 315 | check_int_fn2(fcmp_fast, least_pos_denorm.f, most_pos_denorm.f); 316 | check_int_fn2(fcmp_fast, most_pos_denorm.f, least_pos_denorm.f); 317 | check_int_fn2(fcmp_fast, least_pos_denorm.f, least_pos_denorm.f); 318 | check_int_fn2(fcmp_fast, least_pos_denorm.f, least_pos_denorm.f); 319 | check_int_fn2(fcmp_fast, least_pos_norm.f, least_pos_denorm.f); 320 | 321 | if (rom_version > 1) { 322 | check_fcmp_flags(-3.0f, 7.3f); 323 | check_fcmp_flags(3.0f, -7.3f); 324 | check_fcmp_flags(3.0f, 3.0f); 325 | check_fcmp_flags(3.0f, -INFINITY); 326 | check_fcmp_flags(3.0f, INFINITY); 327 | 328 | check_fcmp_flags(least_neg_denorm.f, most_neg_denorm.f); 329 | check_fcmp_flags(most_neg_denorm.f, least_neg_denorm.f); 330 | check_fcmp_flags(least_neg_denorm.f, least_neg_denorm.f); 331 | check_fcmp_flags(least_neg_norm.f, least_neg_denorm.f); 332 | check_fcmp_flags(least_pos_denorm.f, most_pos_denorm.f); 333 | check_fcmp_flags(most_pos_denorm.f, least_pos_denorm.f); 334 | check_fcmp_flags(least_pos_denorm.f, least_pos_denorm.f); 335 | check_fcmp_flags(least_pos_denorm.f, least_pos_denorm.f); 336 | check_fcmp_flags(least_pos_norm.f, least_pos_denorm.f); 337 | } 338 | 339 | check_fcmp_fast_flags(-3.0f, 7.3f); 340 | check_fcmp_fast_flags(3.0f, -7.3f); 341 | check_fcmp_fast_flags(3.0f, 3.0f); 342 | check_fcmp_fast_flags(3.0f, -INFINITY); 343 | check_fcmp_fast_flags(3.0f, INFINITY); 344 | 345 | check_fcmp_fast_flags(least_neg_denorm.f, most_neg_denorm.f); 346 | check_fcmp_fast_flags(most_neg_denorm.f, least_neg_denorm.f); 347 | check_fcmp_fast_flags(least_neg_denorm.f, least_neg_denorm.f); 348 | check_fcmp_fast_flags(least_neg_norm.f, least_neg_denorm.f); 349 | check_fcmp_fast_flags(least_pos_denorm.f, most_pos_denorm.f); 350 | check_fcmp_fast_flags(most_pos_denorm.f, least_pos_denorm.f); 351 | check_fcmp_fast_flags(least_pos_denorm.f, least_pos_denorm.f); 352 | check_fcmp_fast_flags(least_pos_denorm.f, least_pos_denorm.f); 353 | check_fcmp_fast_flags(least_pos_norm.f, least_pos_denorm.f); 354 | 355 | check_float_fn1(fsqrt, 3.0f); 356 | 357 | // we are returning INFINITE not NAN as we don't support NANs 358 | // check_float_fn1(fsqrt, -3.0f); 359 | if (rom_version == 1) 360 | { 361 | // is buggy in rom version 1 362 | ASSERT(INFINITY == mufp_funcs->mufp_fsqrt(-3.0f)); 363 | } else { 364 | ASSERT(-INFINITY == mufp_funcs->mufp_fsqrt(-3.0f)); 365 | } 366 | 367 | check_int_fn1(float2int, 3.f); 368 | check_int_fn1(float2int, 123456000000.f); 369 | check_int_fn1(float2int, -3.f); 370 | check_int_fn1(float2int, -123456000000.f); 371 | check_int_fn1(float2int, INFINITY); 372 | check_int_fn1(float2int, -INFINITY); 373 | 374 | check_int_fn2(float2fix, 3.f, 3); 375 | check_int_fn2(float2fix, 31.f, -3); 376 | check_int_fn2(float2fix, -3.f, 3); 377 | // todo JURY IS OUT ON THIS ONE 378 | //check_int_fn2(float2fix, -31.f, -3); 379 | 380 | check_uint_fn1(float2uint, 3.f); 381 | check_uint_fn1(float2uint, 123456000000.f); 382 | check_uint_fn1(float2uint, -3.f); 383 | check_uint_fn1(float2uint, -123456000000.f); 384 | 385 | check_uint_fn2(float2ufix, 3.f, 3); 386 | check_uint_fn2(float2ufix, 3.f, -3); 387 | 388 | check_float_fn1(int2float, 3); 389 | check_float_fn1(int2float, INT32_MAX); 390 | check_float_fn1(int2float, INT32_MIN); 391 | check_float_fn1(int2float, -3); 392 | 393 | check_float_fn2(fix2float, 3, 3); 394 | check_float_fn2(fix2float, 3, -3); 395 | check_float_fn2(fix2float, -3, 3); 396 | check_float_fn2(fix2float, -3, -3); 397 | 398 | check_float_fn1(uint2float, 3); 399 | check_float_fn1(uint2float, UINT32_MAX); 400 | 401 | // float (*mufp_ufix2float)(uint, int); 402 | 403 | check_float_fn1(fcos, 0.0f); 404 | check_float_fn1(fcos, 2.7f); 405 | check_float_fn1(fcos, -32.7f); 406 | 407 | check_float_fn1(fsin, 0.0f); 408 | check_float_fn1(fsin, 2.7f); 409 | check_float_fn1(fsin, -32.7f); 410 | 411 | check_float_fn1(ftan, 0.0f); 412 | check_float_fn1(ftan, 2.7f); 413 | check_float_fn1(ftan, -32.7f); 414 | 415 | if (rom_version > 1) { 416 | // broken on rom v 1 417 | // todo check broken range 418 | check_float_fn2(fatan2, 3.0f, 4.0f); 419 | check_float_fn2(fatan2, -3.0f, 4.0f); 420 | check_float_fn2(fatan2, 4.0f, -.31f); 421 | check_float_fn2(fatan2, -3.0f, -.17f); 422 | } 423 | 424 | check_float_fn1(fexp, 0.0f); 425 | check_float_fn1(fexp, 2.7f); 426 | check_float_fn1(fexp, -32.7f); 427 | 428 | check_float_fn1(fln, 0.3f); 429 | check_float_fn1(fln, 1.0f); 430 | check_float_fn1(fln, 2.7f); 431 | // we are returning -INFINITE as we don't support NANs 432 | // check_float_fn1(fln, -32.7f); 433 | ASSERT(-INFINITY == mufp_funcs->mufp_fln(-32.7f)); 434 | 435 | if (rom_version > 1) { 436 | check_int64_fn1(float2int64, 3.f); 437 | check_int64_fn1(float2int64, 123456000000.f); 438 | check_int64_fn1(float2int64, 12345678912345.f); 439 | check_int64_fn1(float2int64, -3.f); 440 | check_int64_fn1(float2int64, -123456000000.f); 441 | check_int64_fn1(float2int64, -12345678912345.f); 442 | 443 | // seems like gcc is wrong on this one 444 | // check_int64_fn1(float2int64, INFINITY); 445 | // so 446 | ASSERT(INT64_MAX == mufp_funcs->mufp_float2int64(INFINITY)); 447 | 448 | // seems like gcc is wrong on this one 449 | // check_int64_fn1(float2int64, -INFINITY); 450 | // so 451 | ASSERT( INT64_MIN == mufp_funcs->mufp_float2int64(-INFINITY)); 452 | 453 | 454 | check_int64_fn2(float2fix64, 3.f, 3); 455 | check_int64_fn2(float2fix64, 31.f, -3); 456 | check_int64_fn2(float2fix64, -3.f, 3); 457 | // todo JURY IS OUT ON THIS ONE 458 | //check_int64_fn2(float2fix64, -31.f, -3); 459 | 460 | check_uint64_fn1(float2uint64, 3.f); 461 | check_uint64_fn1(float2uint64, 123456000000.f); 462 | check_uint64_fn1(float2uint64, 12345678912345.f); 463 | check_uint64_fn1(float2uint64, -3.f); 464 | check_uint64_fn1(float2uint64, -123456000000.f); 465 | check_uint64_fn1(float2uint64, -12345678912345.f); 466 | 467 | check_uint64_fn2(float2ufix64, 3.f, 3); 468 | check_uint64_fn2(float2ufix64, 3.f, 43); 469 | check_uint64_fn2(float2ufix64, 3.f, -3); 470 | check_uint64_fn2(float2ufix64, 3.f, -43); 471 | 472 | #define LARGE 0x1234567800000000ll 473 | check_float_fn1(int642float, 3); 474 | check_float_fn1(int642float, LARGE); 475 | check_float_fn1(int642float, INT32_MAX); 476 | check_float_fn1(int642float, INT64_MAX); 477 | check_float_fn1(int642float, INT32_MIN); 478 | check_float_fn1(int642float, INT64_MIN); 479 | check_float_fn1(int642float, -3); 480 | check_float_fn1(int642float, -LARGE); 481 | 482 | check_float_fn2(fix642float, 3, 3); 483 | check_float_fn2(fix642float, 3, -3); 484 | check_float_fn2(fix642float, -3, 3); 485 | check_float_fn2(fix642float, -3, -3); 486 | 487 | check_float_fn2(fix642float, LARGE, 3); 488 | check_float_fn2(fix642float, LARGE, -3); 489 | check_float_fn2(fix642float, -LARGE, 3); 490 | check_float_fn2(fix642float, -LARGE, -3); 491 | 492 | check_float_fn1(uint642float, 3); 493 | check_float_fn1(uint642float, UINT32_MAX); 494 | check_float_fn1(uint642float, UINT64_MAX); 495 | check_float_fn1(uint642float, LARGE); 496 | 497 | check_float_fn1(float2double, 3.0f); 498 | check_float_fn1(float2double, 3.0e12f); 499 | check_float_fn1(float2double, -3.0e12f); 500 | check_float_fn1(float2double, -3.131592754123214125f); 501 | check_float_fn1(float2double, 3.131592754123214125f); 502 | check_float_fn1(float2double, -INFINITY); 503 | check_float_fn1(float2double, INFINITY); 504 | check_float_fn1(float2double, most_pos_denorm.f); 505 | check_float_fn1(float2double, least_pos_denorm.f); 506 | check_float_fn1(float2double, least_pos_norm.f); 507 | check_float_fn1(float2double, least_neg_denorm.f); 508 | check_float_fn1(float2double, least_neg_norm.f); 509 | check_float_fn1(float2double, most_neg_denorm.f); 510 | check_float_fn1(float2double, 0.f); 511 | check_float_fn1(float2double, -0.f); 512 | } 513 | 514 | if (rom_version >= 3) { 515 | for(float a = -0.3f; a<7.f; a += 0.137) { 516 | float s = mufp_funcs->mufp_fsin(a); 517 | float c = mufp_funcs->mufp_fcos(a); 518 | float co=0; 519 | float so = call_fsincos(a, &co, mufp_funcs->v3_mufp_fsincos); 520 | ASSERT(so == s && co == c); 521 | } 522 | } 523 | printf("FLOAT OK\n"); 524 | return 0; 525 | } 526 | -------------------------------------------------------------------------------- /test/tictoc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | static inline void tictoc_init() { 8 | *(volatile unsigned int *)0xe000e010=5; // enable SYSTICK at core clock 9 | } 10 | 11 | static inline unsigned int cyc() { 12 | return (~*(volatile unsigned int *)0xe000e018)<<8; 13 | } 14 | 15 | 16 | -------------------------------------------------------------------------------- /usb_device_tiny/runtime.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include "hardware/regs/m0plus.h" 8 | #include "hardware/structs/clocks.h" 9 | #include "hardware/structs/psm.h" 10 | #include "hardware/structs/sio.h" 11 | #include "hardware/structs/watchdog.h" 12 | #include "hardware/resets.h" 13 | 14 | #include "runtime.h" 15 | 16 | #ifdef USE_BOOTROM_GPIO 17 | 18 | #include "hardware/regs/pads_bank0.h" 19 | #include "hardware/structs/iobank0.h" 20 | #include "hardware/structs/sio.h" 21 | 22 | #endif 23 | 24 | #ifdef USE_16BIT_ROM_FUNCS 25 | asm ( 26 | ".global _rom_functions\n" 27 | "_rom_functions:\n" 28 | ".hword _dead + 1\n" // should not be called 29 | ".hword impl_usb_transfer_current_packet_only + 1\n" 30 | #ifdef USE_PICOBOOT 31 | ".hword impl_picoboot_cmd_packet + 1\n" 32 | #else 33 | ".hword _dead + 1\n" // should not be called 34 | #endif 35 | ".hword impl_msc_cmd_packet + 1\n" 36 | ".hword impl_usb_stream_packet_packet_handler + 1\n" 37 | ".hword impl_msc_on_sector_stream_chunk + 1\n" 38 | #ifdef USE_PICOBOOT 39 | ".hword impl_picoboot_on_stream_chunk + 1\n" 40 | #endif 41 | ); 42 | #endif 43 | 44 | void memset0(void *dest, uint n) { 45 | extern void __memset(void *dest, int c, uint n); 46 | __memset(dest, 0, n); 47 | } 48 | 49 | volatile bool rebooting; 50 | 51 | // note this is always false in USB_BOOT_EXPANDED_RUNTIME but that is ok 52 | bool watchdog_rebooting() { 53 | return rebooting; 54 | } 55 | 56 | #ifndef USB_BOOT_EXPANDED_RUNTIME 57 | 58 | void interrupt_enable(uint irq, bool enable) { 59 | assert(irq < N_IRQS); 60 | if (enable) { 61 | // Clear pending before enable 62 | // (if IRQ is actually asserted, it will immediately re-pend) 63 | *(volatile uint32_t *) (PPB_BASE + M0PLUS_NVIC_ICPR_OFFSET) = 1u << irq; 64 | *(volatile uint32_t *) (PPB_BASE + M0PLUS_NVIC_ISER_OFFSET) = 1u << irq; 65 | } else { 66 | *(volatile uint32_t *) (PPB_BASE + M0PLUS_NVIC_ICER_OFFSET) = 1u << irq; 67 | } 68 | } 69 | 70 | void watchdog_reboot(uint32_t pc, uint32_t sp, uint32_t delay_ms) { 71 | check_hw_layout(watchdog_hw_t, scratch[7], WATCHDOG_SCRATCH7_OFFSET); 72 | // Set power regs such that everything is reset by the watchdog 73 | // except ROSC, XOSC, and RUN_DEBOUNCED (resets the glitchless mux in clkslice). 74 | // One of which will be the watchdog tick 75 | *((volatile uint32_t *) (PSM_BASE + PSM_WDSEL_OFFSET)) = 76 | PSM_WDSEL_BITS & ~(PSM_WDSEL_ROSC_BITS | PSM_WDSEL_XOSC_BITS); 77 | hw_clear_bits(&watchdog_hw->ctrl, WATCHDOG_CTRL_ENABLE_BITS | WATCHDOG_CTRL_PAUSE_DBG0_BITS | WATCHDOG_CTRL_PAUSE_DBG1_BITS | 78 | WATCHDOG_CTRL_PAUSE_JTAG_BITS); // want to reboot even under debugger 79 | if (pc) { 80 | pc |= 1u; // thumb mode 81 | watchdog_hw->scratch[4] = 0xb007c0d3; 82 | watchdog_hw->scratch[5] = pc ^ -watchdog_hw->scratch[4]; 83 | watchdog_hw->scratch[6] = sp; 84 | watchdog_hw->scratch[7] = pc; 85 | } else { 86 | watchdog_hw->scratch[4] = 0; 87 | } 88 | watchdog_hw->load = delay_ms * 1000; 89 | // Switch clk_ref to crystal, then ensure watchdog is ticking with a 90 | // divisor of 12. For USB we require a 12 MHz crystal. 91 | if (!running_on_fpga()) { 92 | // Note clock mux not present on FPGA (clk_ref hardwired to 12 MHz) 93 | hw_write_masked(&clocks_hw->clk[clk_ref].ctrl, CLOCKS_CLK_REF_CTRL_SRC_VALUE_XOSC_CLKSRC, 94 | CLOCKS_CLK_REF_CTRL_SRC_BITS); 95 | while (!(clocks_hw->clk[clk_ref].selected & (1u << CLOCKS_CLK_REF_CTRL_SRC_VALUE_XOSC_CLKSRC))); 96 | } 97 | watchdog_hw->tick = 12u << WATCHDOG_TICK_CYCLES_LSB; 98 | hw_set_bits(&watchdog_hw->tick, WATCHDOG_TICK_ENABLE_BITS); 99 | hw_set_bits(&watchdog_hw->ctrl, WATCHDOG_CTRL_ENABLE_BITS); 100 | rebooting = true; 101 | } 102 | 103 | #endif 104 | 105 | #ifdef USE_BOOTROM_GPIO 106 | 107 | #ifdef USB_BOOT_EXPANDED_RUNTIME 108 | #include "gpio.h" 109 | #else 110 | 111 | static inline io_rw_32 *GPIO_CTRL_REG(uint x) { 112 | uint32_t offset = 0; 113 | 114 | // removed since we know it is in range 115 | // if (x >= 0 && x < 32) 116 | offset = IO_BANK0_BASE + (x * 8) + 4; 117 | 118 | return (io_rw_32 *) offset; 119 | } 120 | 121 | static inline io_rw_32 *PAD_CTRL_REG(uint x) { 122 | uint32_t offset = 0; 123 | 124 | // removed since we know it is in range 125 | // if (x >= 0 && x < 32) 126 | offset = PADS_BANK0_BASE + PADS_BANK0_GPIO0_OFFSET + (x * 4); 127 | 128 | return (io_rw_32 *) offset; 129 | } 130 | 131 | void gpio_funcsel(uint i, int fn) { 132 | io_rw_32 *pad_ctl = PAD_CTRL_REG(i); 133 | // we are only enabling output, so just clear the OD 134 | hw_clear_bits(pad_ctl, PADS_BANK0_GPIO0_OD_BITS); 135 | // Set the funcsel 136 | *GPIO_CTRL_REG(i) = (fn << IO_BANK0_GPIO0_CTRL_FUNCSEL_LSB); 137 | } 138 | 139 | #endif 140 | 141 | uint32_t usb_activity_gpio_pin_mask; 142 | 143 | void gpio_setup() { 144 | if (usb_activity_gpio_pin_mask) { 145 | unreset_block(RESETS_RESET_IO_BANK0_BITS); 146 | sio_hw->gpio_set = usb_activity_gpio_pin_mask; 147 | sio_hw->gpio_oe_set = usb_activity_gpio_pin_mask; 148 | // need pin number rather than mask 149 | gpio_funcsel(ctz32(usb_activity_gpio_pin_mask), 5); 150 | } 151 | #ifndef NDEBUG 152 | // Set to RIO for debug 153 | for (int i = 19; i < 23; i++) { 154 | gpio_init(i); 155 | gpio_dir_out_mask(1 << i); 156 | } 157 | #endif 158 | } 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /usb_device_tiny/runtime.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _RUNTIME_H 8 | #define _RUNTIME_H 9 | 10 | #include "pico.h" 11 | 12 | #include "hardware/structs/sio.h" 13 | #include "hardware/structs/timer.h" 14 | 15 | #ifdef USE_PICOBOOT 16 | // allow usb_boot to not include all interfaces 17 | #define USB_BOOT_WITH_SUBSET_OF_INTERFACES 18 | // use a fixed number of interfaces to save code 19 | #define USB_FIXED_INTERFACE_COUNT 2 20 | #else 21 | #define USB_FIXED_INTERFACE_COUNT 1 22 | #endif 23 | 24 | #ifndef USE_16BIT_ROM_FUNCS 25 | #define __rom_function_ref(x) x 26 | #define __rom_function_impl(t,x) t x 27 | #define __rom_function_type(t) t 28 | #define __rom_function_deref(t, x) x 29 | #else 30 | #define __rom_function_ref(x) ((ROM_FUNC ## x)*2) 31 | #define __rom_function_impl(t, x) __used t impl ## x 32 | #define __rom_function_type(t) uint8_t 33 | 34 | #define __rom_function_deref_guts(x) ((uintptr_t)(*(uint16_t*)(_rom_functions + x))) 35 | #define __rom_function_deref(t, x) ((t)__rom_function_deref_guts(x)) 36 | 37 | #define ROM_FUNC_usb_transfer_current_packet_only 1 38 | #ifdef USE_PICOBOOT 39 | #define ROM_FUNC_picoboot_cmd_packet 2 40 | #endif 41 | #define ROM_FUNC_msc_cmd_packet 3 42 | #define ROM_FUNC_usb_stream_packet_packet_handler 4 43 | #define ROM_FUNC_msc_on_sector_stream_chunk 5 44 | #ifdef USE_PICOBOOT 45 | #define ROM_FUNC_picoboot_on_stream_chunk 6 46 | #endif 47 | 48 | extern uint8_t _rom_functions[]; 49 | #endif 50 | 51 | #define __rom_function_static_impl(t, x) /*static */__rom_function_impl(t, x) 52 | 53 | extern void poor_mans_text_decompress(const uint8_t *src, uint32_t size, uint8_t *dest); 54 | 55 | typedef unsigned int uint; 56 | 57 | #define count_of(a) (sizeof(a)/sizeof((a)[0])) 58 | 59 | extern void *__memcpy(void *dest, const void *src, uint n); 60 | // Rom version 61 | #define memcpy __memcpy 62 | extern void memset0(void *dest, uint count); 63 | void interrupt_enable(uint int_num, bool enable); 64 | void watchdog_reboot(uint32_t pc, uint32_t sp, uint32_t delay_ms); 65 | extern bool watchdog_rebooting(); 66 | 67 | #ifdef USE_BOOTROM_GPIO 68 | extern void gpio_setup(); 69 | // bit mask of pins for "disk activity" 70 | extern uint32_t usb_activity_gpio_pin_mask; 71 | #endif 72 | 73 | #ifndef USB_BOOT_EXPANDED_RUNTIME 74 | #define usb_trace(format, ...) ((void)0) 75 | #define usb_debug(format, ...) ((void)0) 76 | #define usb_warn(format, ...) ((void)0) 77 | #define usb_panic(format, ...) __breakpoint() 78 | #define uf2_debug(format, ...) ((void)0) 79 | #define printf(format, ...) ((void)0) 80 | #define puts(str) ((void)0) 81 | 82 | extern uint32_t ctz32(uint32_t x); 83 | 84 | #ifdef USE_BOOTROM_GPIO 85 | 86 | #define PIN_DBG1 19 87 | 88 | // note these two macros may only be used once per complilation unit 89 | #define CU_REGISTER_DEBUG_PINS(p...) enum __unused DEBUG_PIN_TYPE { _none = 0, p }; static enum DEBUG_PIN_TYPE __selected_debug_pins; 90 | #define CU_SELECT_DEBUG_PINS(x) static enum DEBUG_PIN_TYPE __selected_debug_pins = (x); 91 | 92 | // Drive high every GPIO appearing in mask 93 | static inline void gpio_set_mask(uint32_t mask) { 94 | *(volatile uint32_t *) (SIO_BASE + SIO_GPIO_OUT_SET_OFFSET) = mask; 95 | } 96 | 97 | // Drive low every GPIO appearing in mask 98 | static inline void gpio_clr_mask(uint32_t mask) { 99 | *(volatile uint32_t *) (SIO_BASE + SIO_GPIO_OUT_CLR_OFFSET) = mask; 100 | } 101 | 102 | // Toggle every GPIO appearing in mask 103 | static inline void gpio_xor_mask(uint32_t mask) { 104 | *(volatile uint32_t *) (SIO_BASE + SIO_GPIO_OUT_XOR_OFFSET) = mask; 105 | } 106 | 107 | #define DEBUG_PINS_ENABLED(p) (__selected_debug_pins == (p)) 108 | #define DEBUG_PINS_SET(p, v) if (DEBUG_PINS_ENABLED(p)) gpio_set_mask((unsigned)(v)<timerawl; 124 | } 125 | 126 | #else 127 | #include "debug.h" 128 | extern void uart_init(int i, int rate, int hwflow); 129 | extern void panic(const char *fmt, ...); 130 | extern int printf(const char *fmt, ...); 131 | extern int puts(const char *str); 132 | #define usb_panic(format,args...) panic(format, ## args) 133 | #define usb_warn(format,args...) ({printf("WARNING: "); printf(format, ## args); }) 134 | #if false && !defined(NDEBUG) 135 | #define usb_debug(format,args...) printf(format, ## args) 136 | #else 137 | #define usb_debug(format,...) ((void)0) 138 | #endif 139 | #if false && !defined(NDEBUG) 140 | #define usb_trace(format,args...) printf(format, ## args) 141 | #else 142 | #define usb_trace(format,...) ((void)0) 143 | #endif 144 | #if true && !defined(NDEBUG) 145 | #define uf2_debug(format,args...) printf(format, ## args) 146 | #else 147 | #define uf2_debug(format,...) ((void)0) 148 | #endif 149 | #define ctz32 __builtin_ctz 150 | #endif 151 | #endif 152 | -------------------------------------------------------------------------------- /usb_device_tiny/scsi.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _SCSI_H 8 | #define _SCSI_H 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #define CBW_SIG 0x43425355 15 | struct __aligned(4) __packed scsi_cbw { 16 | uint32_t sig; 17 | uint32_t tag; 18 | uint32_t data_transfer_length; 19 | uint8_t flags; 20 | uint8_t lun; 21 | uint8_t cb_length; 22 | uint8_t cb[16]; 23 | }; 24 | 25 | #define CSW_SIG 0x53425355 26 | struct __packed scsi_csw { 27 | uint32_t sig; 28 | uint32_t tag; 29 | uint32_t residue; 30 | uint8_t status; 31 | }; 32 | 33 | struct __packed scsi_capacity { 34 | uint32_t lba; // last block addr 35 | uint32_t block_len; // probably 512 36 | }; 37 | 38 | struct __packed scsi_read_cb { 39 | uint8_t opcode; 40 | uint8_t flags; 41 | uint32_t lba; 42 | uint8_t reserved; 43 | uint16_t blocks; 44 | uint8_t control; 45 | }; 46 | 47 | enum csw_status { 48 | CSW_STATUS_COMMAND_PASSED = 0x00, 49 | CSW_STATUS_COMMAND_FAILED = 0x01, 50 | CSW_STATUS_PHASE_ERROR = 0x02, 51 | }; 52 | 53 | enum scsi_cmd { 54 | INQUIRY = 0x12, 55 | MODE_SELECT_6 = 0x15, 56 | MODE_SELECT_10 = 0x55, 57 | MODE_SENSE_6 = 0x1a, 58 | MODE_SENSE_10 = 0x5a, 59 | PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, 60 | READ_6 = 0x08, 61 | READ_10 = 0x28, 62 | READ_12 = 0xa8, 63 | READ_FORMAT_CAPACITIES = 0x23, 64 | READ_CAPACITY_10 = 0x25, 65 | REPORT_LUNS = 0xa0, 66 | REQUEST_SENSE = 0x03, 67 | SEND_DIAGNOSTIC = 0x1d, 68 | START_STOP_UNIT = 0x1b, 69 | SYNCHRONIZE_CACHE = 0x35, 70 | TEST_UNIT_READY = 0x00, 71 | VERIFY = 0x2f, 72 | WRITE_6 = 0x0a, 73 | WRITE_10 = 0x2a, 74 | WRITE_12 = 0xaa, 75 | }; 76 | 77 | enum scsi_sense_key { 78 | SK_OK = 0x00, 79 | SK_NOT_READY = 0x02, 80 | SK_ILLEGAL_REQUEST = 0x05, 81 | SK_UNIT_ATTENTION = 0x06, 82 | SK_DATA_PROTECT = 0x07 83 | }; 84 | 85 | enum scsi_additional_sense_code { 86 | ASC_NONE = 0x00, 87 | ASC_INVALID_COMMAND_OPERATION_CODE = 0x20, 88 | ASC_PERIPHERAL_DEVICE_WRITE_FAULT = 0x03, 89 | ASC_ACCESS_DENIED = 0x20, 90 | ASC_LBA_OUT_OF_RANGE = 0x21, 91 | ASC_WRITE_PROTECTED = 0x27, 92 | ASC_NOT_READY_TO_READY_CHANGE = 0x28, 93 | ASC_MEDIUM_NOT_PRESENT = 0x3a, 94 | }; 95 | 96 | enum scsi_additional_sense_code_qualifier { 97 | ASCQ_NA = 0x00, 98 | }; 99 | #endif -------------------------------------------------------------------------------- /usb_device_tiny/scsi_ir.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _SCSI_IR_H 8 | #define _SCSI_IR_H 9 | 10 | // NOTE THIS IS IN A SEPARATE HEADER AS IT IS COMPRESSED WHEN USING COMPRESS_TEXT 11 | typedef unsigned char uint8_t; 12 | 13 | struct scsi_inquiry_response { 14 | uint8_t pdt; 15 | uint8_t rmb; 16 | uint8_t spc_version; 17 | uint8_t rdf; 18 | uint8_t additional_length; 19 | uint8_t inquiry5; 20 | uint8_t inquiry6; 21 | uint8_t inquiry7; 22 | char vendor[8]; 23 | char product[16]; 24 | char version[4]; 25 | } __packed; 26 | 27 | #ifndef COMPRESS_TEXT 28 | static const struct scsi_inquiry_response scsi_ir = { 29 | .rmb = 0x80, 30 | .spc_version = 2, 31 | .rdf = 2, 32 | .additional_length = sizeof(struct scsi_inquiry_response) - 4, 33 | .vendor = "RPI ", 34 | .product = "RP2 ", 35 | .version = "3 ", 36 | }; 37 | #endif 38 | #endif -------------------------------------------------------------------------------- /usb_device_tiny/usb_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _USB_USB_COMMON_H 8 | #define _USB_USB_COMMON_H 9 | 10 | #include "pico/types.h" 11 | #include "hardware/structs/usb.h" 12 | 13 | // bmRequestType bit definitions 14 | #define USB_REQ_TYPE_STANDARD 0x00u 15 | #define USB_REQ_TYPE_TYPE_MASK 0x60u 16 | #define USB_REQ_TYPE_TYPE_CLASS 0x20u 17 | #define USB_REQ_TYPE_TYPE_VENDOR 0x40u 18 | 19 | #define USB_REQ_TYPE_RECIPIENT_MASK 0x1fu 20 | #define USB_REQ_TYPE_RECIPIENT_DEVICE 0x00u 21 | #define USB_REQ_TYPE_RECIPIENT_INTERFACE 0x01u 22 | #define USB_REQ_TYPE_RECIPIENT_ENDPOINT 0x02u 23 | 24 | #define USB_DIR_OUT 0x00u 25 | #define USB_DIR_IN 0x80u 26 | 27 | #define USB_TRANSFER_TYPE_CONTROL 0x0 28 | #define USB_TRANSFER_TYPE_ISOCHRONOUS 0x1 29 | #define USB_TRANSFER_TYPE_BULK 0x2 30 | #define USB_TRANSFER_TYPE_INTERRUPT 0x3 31 | #define USB_TRANSFER_TYPE_BITS 0x3 32 | 33 | // Descriptor types 34 | #define USB_DT_DEVICE 0x01 35 | #define USB_DT_CONFIG 0x02 36 | #define USB_DT_STRING 0x03 37 | #define USB_DT_INTERFACE 0x04 38 | #define USB_DT_ENDPOINT 0x05 39 | 40 | #define USB_REQUEST_GET_STATUS 0x0 41 | #define USB_REQUEST_CLEAR_FEATURE 0x01 42 | #define USB_REQUEST_SET_FEATURE 0x03 43 | #define USB_REQUEST_SET_ADDRESS 0x05 44 | #define USB_REQUEST_GET_DESCRIPTOR 0x06 45 | #define USB_REQUEST_SET_DESCRIPTOR 0x07 46 | #define USB_REQUEST_GET_CONFIGURATION 0x08 47 | #define USB_REQUEST_SET_CONFIGURATION 0x09 48 | #define USB_REQUEST_GET_INTERFACE 0x0a 49 | #define USB_REQUEST_SET_INTERFACE 0x0b 50 | #define USB_REQUEST_SYNC_FRAME 0x0c 51 | 52 | #define USB_REQUEST_MSC_GET_MAX_LUN 0xfe 53 | #define USB_REQUEST_MSC_RESET 0xff 54 | 55 | #define USB_FEAT_ENDPOINT_HALT 0x00 56 | #define USB_FEAT_DEVICE_REMOTE_WAKEUP 0x01 57 | #define USB_FEAT_TEST_MODE 0x02 58 | 59 | #define USB_DESCRIPTOR_TYPE_ENDPOINT 0x05 60 | 61 | struct usb_setup_packet { 62 | uint8_t bmRequestType; 63 | uint8_t bRequest; 64 | uint16_t wValue; 65 | uint16_t wIndex; 66 | uint16_t wLength; 67 | } __packed; 68 | 69 | struct usb_descriptor { 70 | uint8_t bLength; 71 | uint8_t bDescriptorType; 72 | }; 73 | 74 | struct usb_device_descriptor { 75 | uint8_t bLength; 76 | uint8_t bDescriptorType; 77 | uint16_t bcdUSB; 78 | uint8_t bDeviceClass; 79 | uint8_t bDeviceSubClass; 80 | uint8_t bDeviceProtocol; 81 | uint8_t bMaxPacketSize0; 82 | uint16_t idVendor; 83 | uint16_t idProduct; 84 | uint16_t bcdDevice; 85 | uint8_t iManufacturer; 86 | uint8_t iProduct; 87 | uint8_t iSerialNumber; 88 | uint8_t bNumConfigurations; 89 | } __packed; 90 | 91 | struct usb_configuration_descriptor { 92 | uint8_t bLength; 93 | uint8_t bDescriptorType; 94 | uint16_t wTotalLength; 95 | uint8_t bNumInterfaces; 96 | uint8_t bConfigurationValue; 97 | uint8_t iConfiguration; 98 | uint8_t bmAttributes; 99 | uint8_t bMaxPower; 100 | } __packed; 101 | 102 | struct usb_interface_descriptor { 103 | uint8_t bLength; 104 | uint8_t bDescriptorType; 105 | uint8_t bInterfaceNumber; 106 | uint8_t bAlternateSetting; 107 | uint8_t bNumEndpoints; 108 | uint8_t bInterfaceClass; 109 | uint8_t bInterfaceSubClass; 110 | uint8_t bInterfaceProtocol; 111 | uint8_t iInterface; 112 | } __packed; 113 | 114 | struct usb_endpoint_descriptor { 115 | uint8_t bLength; 116 | uint8_t bDescriptorType; 117 | uint8_t bEndpointAddress; 118 | uint8_t bmAttributes; 119 | uint16_t wMaxPacketSize; 120 | uint8_t bInterval; 121 | } __packed; 122 | 123 | struct usb_endpoint_descriptor_long { 124 | uint8_t bLength; 125 | uint8_t bDescriptorType; 126 | uint8_t bEndpointAddress; 127 | uint8_t bmAttributes; 128 | uint16_t wMaxPacketSize; 129 | uint8_t bInterval; 130 | uint8_t bRefresh; 131 | uint8_t bSyncAddr; 132 | } __attribute__((packed)); 133 | 134 | #endif -------------------------------------------------------------------------------- /usb_device_tiny/usb_device.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _USB_DEVICE_H 8 | #define _USB_DEVICE_H 9 | 10 | #include "usb_common.h" 11 | #include "runtime.h" 12 | 13 | // ------------------------------------------------------------------------------------ 14 | // In order to save space in bootrom, remove unused features, or simplify some 15 | // abstractions that are not required, whilst leaving the code intent intact 16 | // ------------------------------------------------------------------------------------ 17 | 18 | // GENERAL_SIZE_HACKS (set in build) size hacks (e.g. asm) that are valid in bootrom and usb_device_test 19 | // BOOTROM_ONLY_SIZE_HACK (set in build) size hacks that require this to be actual bootrom (e.g. 16 bit pointers) 20 | 21 | #define USB_ASSUME_ZERO_INIT 22 | 23 | #ifndef USB_BOOT_WITH_SUBSET_OF_INTERFACES 24 | #define USB_ASSUME_ENDPOINTS_ARRAY_FULL 25 | #endif 26 | 27 | // no custom per device setup packet handler 28 | #define USB_NO_DEVICE_SETUP_HANDLER 29 | 30 | // since our device has on_configure, require it so save a null test 31 | #define USB_MUST_HAVE_DEVICE_ON_CONFIGURE 32 | 33 | // since all our non 0 endpoints are bulk, require that to allow compile time constants 34 | #define USB_BULK_ONLY_EP1_THRU_16 35 | 36 | // our interfaces are zero based number in the order they appear on the device - require that 37 | #define USB_ZERO_BASED_INTERFACES 38 | 39 | // no custom per endpoint setup packet handlers 40 | #define USB_NO_ENDPOINT_SETUP_HANDLER 41 | 42 | // do on_init method for transfer 43 | #define USB_NO_TRANSFER_ON_INIT 44 | 45 | // do on_cancel method for transfer 46 | #define USB_NO_TRANSFER_ON_CANCEL 47 | 48 | // don't store the endpoints on the interface, and as a result 49 | // just do some endpoint level init during the endpoint initializers 50 | #define USB_NO_INTERFACE_ENDPOINTS_MEMBER 51 | 52 | #define USB_NO_INTERFACE_ALTERNATES 53 | 54 | #ifndef USB_ISOCHRONOUS_BUFFER_STRIDE_TYPE 55 | #define USB_ISOCHRONOUS_BUFFER_STRIDE_TYPE 0 56 | #endif 57 | 58 | static_assert(USB_ISOCHRONOUS_BUFFER_STRIDE_TYPE >= 0 && USB_ISOCHRONOUS_BUFFER_STRIDE_TYPE < 4, ""); 59 | 60 | // don't zero out most structures (since we do so globablly for BSS) 61 | #define USB_SKIP_COMMON_INIT 62 | 63 | // only 16 bytes saved to not set a sense code 64 | //#define USB_SILENT_FAIL_ON_EXCLUSIVE 65 | 66 | #ifndef NDEBUG 67 | #define __removed_for_space(x) x 68 | #define __comma_removed_for_space(x) ,x 69 | #else 70 | #define __removed_for_space(x) 71 | #define __comma_removed_for_space(x) 72 | #endif 73 | struct usb_transfer; 74 | struct usb_endpoint; 75 | 76 | typedef void (*usb_transfer_func)(struct usb_endpoint *ep); 77 | typedef void (*usb_transfer_completed_func)(struct usb_endpoint *ep, struct usb_transfer *transfer); 78 | 79 | struct usb_buffer { 80 | uint8_t *data; 81 | uint8_t data_len; 82 | uint8_t data_max; 83 | // then... 84 | bool valid; // aka user owned 85 | }; 86 | 87 | #include "usb_device_private.h" 88 | 89 | struct usb_transfer_type { 90 | // for IN transfers this is called to setup new packet buffers 91 | // for OUT transfers this is called with packet data 92 | // 93 | // In any case usb_packet_done must be called if this function has handled the buffer 94 | __rom_function_type(usb_transfer_func) on_packet; 95 | #ifndef USB_NO_TRANSFER_ON_INIT 96 | usb_transfer_func on_init; 97 | #endif 98 | #ifndef USB_NO_TRANSFER_ON_CANCEL 99 | usb_transfer_func on_cancel; 100 | #endif 101 | uint8_t initial_packet_count; 102 | }; 103 | 104 | struct usb_interface *usb_interface_init(struct usb_interface *interface, const struct usb_interface_descriptor *desc, 105 | struct usb_endpoint *const *endpoints, uint endpoint_count, 106 | bool double_buffered); 107 | struct usb_device *usb_device_init(const struct usb_device_descriptor *desc, 108 | const struct usb_configuration_descriptor *config_desc, 109 | struct usb_interface *const *interfaces, uint interface_count, 110 | const char *(*get_descriptor_string)(uint index)); 111 | 112 | void usb_device_start(); 113 | void usb_device_stop(); 114 | 115 | // explicit stall 116 | void usb_halt_endpoint_on_condition(struct usb_endpoint *ep); 117 | void usb_halt_endpoint(struct usb_endpoint *endpoint); 118 | void usb_clear_halt_condition(struct usb_endpoint *ep); 119 | static inline bool usb_is_endpoint_stalled(struct usb_endpoint *endpoint); 120 | void usb_set_default_transfer(struct usb_endpoint *ep, struct usb_transfer *transfer); 121 | void usb_reset_transfer(struct usb_transfer *transfer, const struct usb_transfer_type *type, 122 | usb_transfer_completed_func on_complete); 123 | void usb_start_transfer(struct usb_endpoint *ep, struct usb_transfer *transfer); 124 | void usb_reset_and_start_transfer(struct usb_endpoint *ep, struct usb_transfer *transfer, 125 | const struct usb_transfer_type *type, usb_transfer_completed_func on_complete); 126 | void usb_chain_transfer(struct usb_endpoint *ep, struct usb_transfer *transfer); 127 | void usb_grow_transfer(struct usb_transfer *transfer, uint packet_count); 128 | void usb_start_default_transfer_if_not_already_running_or_halted(struct usb_endpoint *ep); 129 | 130 | typedef void (*usb_transfer_func)(struct usb_endpoint *ep); 131 | 132 | struct usb_buffer *usb_current_in_packet_buffer(struct usb_endpoint *ep); 133 | struct usb_buffer *usb_current_out_packet_buffer(struct usb_endpoint *ep); 134 | uint8_t *usb_get_single_packet_response_buffer(struct usb_endpoint *ep, uint len); 135 | 136 | // call during (or asynchronously after) on_packet to mark the packet as done 137 | void usb_packet_done(struct usb_endpoint *ep); 138 | 139 | extern const struct usb_transfer_type usb_current_packet_only_transfer_type; 140 | 141 | static inline struct usb_endpoint *usb_get_control_in_endpoint(); 142 | static inline struct usb_endpoint *usb_get_control_out_endpoint(); 143 | 144 | void usb_start_empty_control_in_transfer(usb_transfer_completed_func on_complete); 145 | void usb_start_empty_control_in_transfer_null_completion(); 146 | void usb_start_tiny_control_in_transfer(uint32_t data, uint len); 147 | void usb_start_single_buffer_control_in_transfer(); 148 | void usb_start_control_out_transfer(const struct usb_transfer_type *type); 149 | void usb_start_empty_transfer(struct usb_endpoint *endpoint, struct usb_transfer *transfer, 150 | usb_transfer_completed_func on_complete); 151 | 152 | void usb_soft_reset_endpoint(struct usb_endpoint *ep); 153 | void usb_hard_reset_endpoint(struct usb_endpoint *ep); 154 | 155 | #ifdef ENABLE_DEBUG_TRACE 156 | void usb_dump_trace(void); 157 | void usb_reset_trace(void); 158 | #else 159 | 160 | static inline void usb_dump_trace() {} 161 | 162 | static inline void usb_reset_trace() {} 163 | 164 | #endif 165 | 166 | #endif -------------------------------------------------------------------------------- /usb_device_tiny/usb_device_private.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _USB_DEVICE_PRIVATE_H 8 | #define _USB_DEVICE_PRIVATE_H 9 | 10 | #include "usb_common.h" 11 | 12 | struct usb_transfer { 13 | // prototype 14 | const struct usb_transfer_type *type; 15 | usb_transfer_completed_func on_complete; 16 | // total number of buffers (packets) that still need to be handed over to the hardware 17 | // during the remaining course of the transfer (with data for IN, empty for data for out) 18 | uint32_t remaining_packets_to_submit; 19 | // total number of buffers when we will expect to receive IRQ/handle_buffer for during 20 | // the remaining course of the transfer 21 | uint32_t remaining_packets_to_handle; 22 | #ifdef GENERAL_SIZE_HACKS 23 | union { 24 | struct { 25 | #endif 26 | // number of packets which require usb_packet_done() 27 | bool outstanding_packet; 28 | // received a packet which we couldn't deliver because there was one outstanding 29 | bool packet_queued; 30 | bool started; 31 | bool completed; 32 | #ifdef GENERAL_SIZE_HACKS 33 | }; 34 | uint32_t all_flags; 35 | }; 36 | #endif 37 | }; 38 | 39 | struct usb_interface { 40 | const struct usb_interface_descriptor *descriptor; 41 | #ifndef USB_NO_INTERFACE_ENDPOINTS_MEMBER 42 | struct usb_endpoint *const *endpoints; 43 | uint8_t endpoint_count; 44 | #endif 45 | bool (*setup_request_handler)(struct usb_interface *interface, struct usb_setup_packet *setup); 46 | #ifndef USB_NO_INTERFACE_ALTERNATES 47 | bool (*set_alternate_handler)(struct usb_interface *interface, uint alt); 48 | uint8_t alt; 49 | #endif 50 | }; 51 | 52 | struct usb_configuration { 53 | const struct usb_configuration_descriptor *descriptor; 54 | struct usb_interface *const *interfaces; 55 | #ifndef USB_FIXED_INTERFACE_COUNT 56 | uint8_t interface_count; 57 | #endif 58 | }; 59 | #ifdef USB_FIXED_INTERFACE_COUNT 60 | #define _usb_interface_count(config) USB_FIXED_INTERFACE_COUNT 61 | #else 62 | #define _usb_interface_count(config) config->interface_count 63 | #endif 64 | 65 | struct usb_device { 66 | const struct usb_device_descriptor *descriptor; 67 | #ifndef USB_NO_DEVICE_SETUP_HANDLER 68 | bool (*setup_request_handler)(struct usb_device *dev, struct usb_setup_packet *setup); 69 | #endif 70 | void (*on_configure)(struct usb_device *dev, bool configured); 71 | const char *(*get_descriptor_string)(uint index); 72 | // only support one config for now 73 | struct usb_configuration config; 74 | uint8_t current_address; // 0 if unaddressed 75 | uint8_t current_config_num; // 0 if unconfigured 76 | uint8_t pending_address; // address to set on completion of SET_ADDRESS CSW 77 | uint16_t next_buffer_offset; 78 | // bool started; 79 | }; 80 | 81 | enum usb_halt_state { 82 | HS_NONE = 0, 83 | HS_NON_HALT_STALL = 1, // just stalled 84 | HS_HALTED = 2, // halted or command halted 85 | HS_HALTED_ON_CONDITION = 3 // halted that cannot be simply cleared by CLEAR_FEATURE 86 | }; 87 | 88 | struct usb_endpoint { 89 | const struct usb_endpoint_descriptor *descriptor; 90 | struct usb_transfer *default_transfer; 91 | struct usb_transfer *current_transfer; 92 | struct usb_transfer *chain_transfer; 93 | void (*on_stall_change)(struct usb_endpoint *ep); 94 | #ifndef USB_NO_ENDPOINT_SETUP_HANDLER 95 | bool (*setup_request_handler)(struct usb_endpoint *ep, struct usb_setup_packet *setup); 96 | #endif 97 | uint16_t dpram_buffer_offset; 98 | uint16_t buffer_size; // for an individual buffer 99 | struct usb_buffer current_hw_buffer; 100 | #ifndef USB_BULK_ONLY_EP1_THRU_16 101 | uint16_t buffer_stride; 102 | #endif 103 | uint8_t num; 104 | uint8_t next_pid; 105 | uint8_t buffer_bit_index; 106 | uint8_t owned_buffer_count; 107 | uint8_t current_give_buffer; 108 | uint8_t current_take_buffer; 109 | uint8_t halt_state; 110 | bool first_buffer_after_reset; 111 | bool double_buffered; 112 | bool in; 113 | }; 114 | 115 | static inline uint usb_endpoint_number(struct usb_endpoint *ep) { 116 | assert(ep); 117 | return ep->descriptor ? ep->descriptor->bEndpointAddress & 0xfu : 0; 118 | } 119 | 120 | static inline bool usb_is_endpoint_stalled(struct usb_endpoint *endpoint) { 121 | return endpoint->halt_state != HS_NONE; 122 | } 123 | 124 | const char *usb_endpoint_dir_string(struct usb_endpoint *ep); 125 | 126 | static inline struct usb_endpoint *usb_get_control_in_endpoint() { 127 | extern struct usb_endpoint usb_control_in; 128 | return &usb_control_in; 129 | } 130 | 131 | static inline struct usb_endpoint *usb_get_control_out_endpoint() { 132 | extern struct usb_endpoint usb_control_out; 133 | return &usb_control_out; 134 | } 135 | 136 | #endif //_USB_DEVICE_PRIVATE_H 137 | -------------------------------------------------------------------------------- /usb_device_tiny/usb_msc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _USB_MSC_H 8 | #define _USB_MSC_H 9 | 10 | #include "runtime.h" 11 | 12 | #define SECTOR_SIZE 512u 13 | 14 | bool msc_setup_request_handler(struct usb_interface *interface, struct usb_setup_packet *setup); 15 | void msc_on_configure(__unused struct usb_device *device, bool configured); 16 | //struct usb_endpoint msc_in, msc_out; 17 | extern struct usb_endpoint msc_endpoints[2]; 18 | 19 | // provided by the hosting code 20 | uint32_t msc_get_serial_number32(); 21 | void msc_eject(); 22 | 23 | #endif -------------------------------------------------------------------------------- /usb_device_tiny/usb_stream_helper.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #include "usb_stream_helper.h" 8 | #include "runtime.h" 9 | 10 | static uint32_t _usb_stream_chunk_offset(struct usb_stream_transfer *transfer) { 11 | return transfer->offset & (transfer->chunk_size - 1); 12 | } 13 | 14 | void usb_stream_packet_handler_complete(struct usb_stream_transfer *transfer) { 15 | struct usb_buffer *buffer; 16 | struct usb_endpoint *ep = transfer->ep; 17 | #ifndef NDEBUG 18 | assert(transfer->packet_handler_complete_expected); 19 | transfer->packet_handler_complete_expected = false; 20 | #endif 21 | assert(ep); 22 | if (ep->in) { 23 | buffer = usb_current_in_packet_buffer(ep); 24 | assert(buffer); 25 | assert(buffer->data_max == 64); 26 | uint chunk_offset = _usb_stream_chunk_offset(transfer); 27 | uint data_len = 64; 28 | if (transfer->offset + 64 > transfer->transfer_length) { 29 | data_len = transfer->transfer_length - transfer->offset; 30 | } 31 | buffer->data_len = data_len; 32 | memcpy(buffer->data, transfer->chunk_buffer + chunk_offset, data_len); 33 | } else { 34 | buffer = usb_current_out_packet_buffer(ep); 35 | assert(buffer); 36 | assert(buffer->data_len); 37 | } 38 | transfer->offset += buffer->data_len; 39 | if (ep->num > 2) usb_debug(" %d transfer_offset %d\n", ep->num, (uint) transfer->offset); 40 | assert(transfer->funcs && transfer->funcs->on_packet_complete); 41 | transfer->funcs->on_packet_complete(transfer); 42 | #ifdef USE_BOOTROM_GPIO 43 | gpio_clr_mask(usb_activity_gpio_pin_mask); 44 | #endif 45 | usb_packet_done(ep); 46 | } 47 | 48 | void usb_stream_chunk_done(struct usb_stream_transfer *transfer) { 49 | usb_stream_packet_handler_complete(transfer); 50 | } 51 | 52 | __rom_function_static_impl(void, _usb_stream_packet_packet_handler)(struct usb_endpoint *ep) { 53 | #ifdef USE_BOOTROM_GPIO 54 | gpio_set_mask(usb_activity_gpio_pin_mask); 55 | #endif 56 | // todo assert type 57 | struct usb_stream_transfer *transfer = (struct usb_stream_transfer *) ep->current_transfer; 58 | uint chunk_offset = _usb_stream_chunk_offset(transfer); 59 | uint chunk_len = 0; // set to non zero to call on_chunk 60 | if (ep->in) { 61 | if (!chunk_offset) { 62 | // we are at the beginning of a chunk so want to call on_chunk 63 | chunk_len = (transfer->offset + transfer->chunk_size) > transfer->transfer_length ? 64 | transfer->transfer_length - transfer->offset : transfer->chunk_size; 65 | if (ep->num > 2) 66 | usb_warn("chunko %d len %05x offset %08x size %04x transfer %08x\n", ep->num, chunk_len, chunk_offset, 67 | (uint) transfer->chunk_size, (uint) transfer->transfer_length); 68 | } 69 | } else { 70 | // usb_debug("write packet %04x %d\n", (uint)transfer->offset, ep->current_take_buffer); 71 | struct usb_buffer *buffer = usb_current_out_packet_buffer(ep); 72 | assert(buffer); 73 | // note we only set chunk_len if this is the end of a chunk 74 | if (transfer->offset + 64 >= transfer->transfer_length) { 75 | // we have ended the transfer (possibly mid-chunk) 76 | chunk_len = transfer->transfer_length & (transfer->chunk_size - 1); 77 | if (chunk_len) { 78 | usb_warn(">> Truncated %08x\n", chunk_len); 79 | } else { 80 | chunk_len = transfer->chunk_size; 81 | } 82 | } else if (chunk_offset + 64 >= transfer->chunk_size) { 83 | // end of regular chunk 84 | chunk_len = transfer->chunk_size; 85 | } 86 | assert(chunk_len || buffer->data_len == 64); 87 | // if (!(!chunk_len || buffer->data_len == ((chunk_len & 63u) ? (chunk_len & 63u) : 64u))) { 88 | // usb_warn("ooh off=%08x len=%08x chunk_off=%04x chunk_len=%04x data_len=%04x\n", (uint)transfer->offset, (uint)transfer->transfer_length, chunk_offset, chunk_len, buffer->data_len); 89 | // } 90 | assert(!chunk_len || buffer->data_len == ((chunk_len & 63u) ? (chunk_len & 63u) : 64u)); 91 | // zero buffer when we start a new buffer, so that the chunk callback never sees data it shouldn't (for partial chunks) 92 | if (!chunk_offset) { 93 | memset0(transfer->chunk_buffer, transfer->chunk_size); 94 | } 95 | memcpy(transfer->chunk_buffer + chunk_offset, buffer->data, buffer->data_len); // always safe to copy all 96 | } 97 | #ifndef NDEBUG 98 | transfer->packet_handler_complete_expected = true; 99 | #endif 100 | 101 | // todo i think this is reasonable since 0 length chunk does nothing 102 | if (chunk_len) { 103 | assert(transfer->funcs && transfer->funcs->on_chunk); 104 | if (__rom_function_deref(stream_on_chunk_function, transfer->funcs->on_chunk)(chunk_len 105 | __comma_removed_for_space( 106 | transfer))) 107 | return; 108 | } 109 | usb_stream_packet_handler_complete(transfer); 110 | } 111 | 112 | static const struct usb_transfer_type _usb_stream_transfer_type = { 113 | .on_packet = __rom_function_ref(_usb_stream_packet_packet_handler) 114 | }; 115 | 116 | void usb_stream_setup_transfer(struct usb_stream_transfer *transfer, const struct usb_stream_transfer_funcs *funcs, 117 | uint8_t *chunk_buffer, uint32_t chunk_size, uint32_t transfer_length, 118 | usb_transfer_completed_func on_complete) { 119 | transfer->funcs = funcs; 120 | transfer->chunk_buffer = chunk_buffer; 121 | assert(!(chunk_size & 63u)); // buffer should be a multiple of USB packet buffer size 122 | transfer->chunk_size = chunk_size; 123 | transfer->offset = 0; 124 | // todo combine with residue? 125 | transfer->transfer_length = transfer_length; 126 | usb_reset_transfer(&transfer->core, &_usb_stream_transfer_type, on_complete); 127 | usb_grow_transfer(&transfer->core, (transfer_length + 63) / 64); 128 | } 129 | 130 | #ifdef USB_BOOT_EXPANDED_RUNTIME 131 | void usb_stream_noop_on_packet_complete(__removed_for_space(__unused struct usb_stream_transfer *transfer)) {} 132 | bool usb_stream_noop_on_chunk(uint32_t size __comma_removed_for_space(__unused struct usb_stream_transfer *transfer)) { 133 | return false; 134 | } 135 | #endif 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /usb_device_tiny/usb_stream_helper.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _USB_STREAM_HELPER_H 8 | #define _USB_STREAM_HELPER_H 9 | 10 | #include "usb_device.h" 11 | 12 | struct usb_transfer_funcs; 13 | 14 | struct usb_stream_transfer { 15 | struct usb_transfer core; 16 | uint32_t offset; // offset within the stream 17 | uint32_t transfer_length; 18 | uint32_t chunk_size; 19 | uint8_t *chunk_buffer; 20 | struct usb_endpoint *ep; 21 | const struct usb_stream_transfer_funcs *funcs; 22 | #ifndef NDEBUG 23 | bool packet_handler_complete_expected; 24 | #endif 25 | }; 26 | 27 | typedef void (*stream_on_packet_complete_function)(__removed_for_space(struct usb_stream_transfer *transfer)); 28 | typedef bool (*stream_on_chunk_function)(uint32_t chunk_len 29 | __comma_removed_for_space(struct usb_stream_transfer *transfer)); 30 | 31 | struct usb_stream_transfer_funcs { 32 | stream_on_packet_complete_function on_packet_complete; 33 | // returns whether processing async 34 | __rom_function_type(stream_on_chunk_function) on_chunk; 35 | }; 36 | 37 | void usb_stream_setup_transfer(struct usb_stream_transfer *transfer, const struct usb_stream_transfer_funcs *funcs, 38 | uint8_t *chunk_buffer, uint32_t chunk_size, uint32_t transfer_length, 39 | usb_transfer_completed_func on_complete); 40 | 41 | void usb_stream_chunk_done(struct usb_stream_transfer *transfer); 42 | 43 | #ifndef USB_BOOT_EXPANDED_RUNTIME 44 | extern void _noop(); 45 | #define usb_stream_noop_on_packet_complete ((stream_on_packet_complete_function)_noop) 46 | #define usb_stream_noop_on_chunk needs_work 47 | #else 48 | void usb_stream_noop_on_packet_complete(__removed_for_space(struct usb_stream_transfer *transfer)); 49 | bool usb_stream_noop_on_chunk(uint32_t chunk_len __comma_removed_for_space( struct usb_stream_transfer *transfer)); 50 | #endif 51 | #endif //SOFTWARE_USB_STREAM_HELPER_H 52 | -------------------------------------------------------------------------------- /usb_device_tiny/virtual_disk.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 | * 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | */ 6 | 7 | #ifndef _VIRTUAL_DISK_H 8 | #define _VIRTUAL_DISK_H 9 | 10 | #include "usb_msc.h" 11 | 12 | #define USE_INFO_UF2 13 | 14 | void vd_init(); 15 | void vd_reset(); 16 | 17 | // return true for async operation 18 | bool vd_read_block(uint32_t token, uint32_t lba, uint8_t *buf __comma_removed_for_space(uint32_t buf_size)); 19 | bool vd_write_block(uint32_t token, uint32_t lba, uint8_t *buf __comma_removed_for_space(uint32_t buf_size)); 20 | 21 | // give us ourselves 16M which should strictly be the minimum for FAT16 - Note Win10 doesn't like FAT12 - go figure! 22 | // upped to 64M which allows us to download a 32M UF2 23 | #define CLUSTER_UP_SHIFT 0u 24 | #define CLUSTER_UP_MUL (1u << CLUSTER_UP_SHIFT) 25 | #define VOLUME_SIZE (CLUSTER_UP_MUL * 128u * 1024u * 1024u) 26 | 27 | #define SECTOR_COUNT (VOLUME_SIZE / SECTOR_SIZE) 28 | 29 | #ifndef GENERAL_SIZE_HACKS 30 | static inline uint32_t vd_sector_count() { 31 | return SECTOR_COUNT; 32 | } 33 | #else 34 | // needs to be a compile time constant 35 | #define vd_sector_count() SECTOR_COUNT 36 | #endif 37 | 38 | void vd_async_complete(uint32_t token, uint32_t result); 39 | #endif 40 | --------------------------------------------------------------------------------