├── sce_sys ├── icon0.png └── livearea │ └── contents │ ├── bg.png │ ├── startup.png │ └── template.xml ├── .gitignore ├── common ├── debugScreen_custom.h ├── debugScreenFont.builder.html ├── debugScreen.h ├── debugScreenFont.c └── debugScreen.c ├── README.md ├── CMakeLists.txt ├── fetch_models.py ├── Makefile └── vita_llm.c /sce_sys/icon0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callbacked/psvita-llm/HEAD/sce_sys/icon0.png -------------------------------------------------------------------------------- /sce_sys/livearea/contents/bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callbacked/psvita-llm/HEAD/sce_sys/livearea/contents/bg.png -------------------------------------------------------------------------------- /sce_sys/livearea/contents/startup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callbacked/psvita-llm/HEAD/sce_sys/livearea/contents/startup.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | CMakeFiles/ 2 | CMakeCache.txt 3 | *.self 4 | *.self.out 5 | *.velf 6 | *.vpk_param.sfo 7 | *.vpk 8 | *.vpk.out 9 | *.png 10 | *.cmake 11 | psvita-llm 12 | /build 13 | .DS_Store 14 | 15 | -------------------------------------------------------------------------------- /sce_sys/livearea/contents/template.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | bg.png 6 | 7 | 8 | 9 | startup.png 10 | 11 | 12 | -------------------------------------------------------------------------------- /common/debugScreen_custom.h: -------------------------------------------------------------------------------- 1 | #ifndef DEBUG_SCREEN_CUSTOM_H 2 | #define DEBUG_SCREEN_CUSTOM_H 3 | 4 | //#define SCREEN_TAB_SIZE (8) 5 | 6 | // backward compatibility for sources based on older Vita SDK versions 7 | //#define DEBUG_SCREEN_CODE_INCLUDE // not recommended for your own projects, but for sake of backward compatibility 8 | #define psvDebugScreenSetFgColor(rgb) psvDebugScreenPrintf("\e[38;2;%lu;%lu;%lum", ((uint32_t)(rgb)>>16)&0xFF, ((uint32_t)(rgb)>>8)&0xFF, (uint32_t)(rgb)&0xFF) 9 | #define psvDebugScreenSetBgColor(rgb) psvDebugScreenPrintf("\e[48;2;%lu;%lu;%lum", ((uint32_t)(rgb)>>16)&0xFF, ((uint32_t)(rgb)>>8)&0xFF, (uint32_t)(rgb)&0xFF) 10 | #define psvDebugScreenClear(rgb) psvDebugScreenSetBgColor(rgb); psvDebugScreenPuts("\e[H\e[2J") 11 | 12 | // custom changes for non-Vita builds 13 | #ifndef __vita__ 14 | #define psvDebugScreenInitReplacement(...) setvbuf(stdout,NULL,_IONBF,0) 15 | #endif 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /common/debugScreenFont.builder.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 41 | 42 | 43 | 44 | 45 | 46 | 47 |
48 | 49 | -------------------------------------------------------------------------------- /common/debugScreen.h: -------------------------------------------------------------------------------- 1 | #ifndef DEBUG_SCREEN_H 2 | #define DEBUG_SCREEN_H 3 | 4 | #include "debugScreen_custom.h" 5 | 6 | typedef struct ColorState { 7 | int fgTrueColorFlag; // flag if truecolors or ANSI/VTERM/GREYSCALE colors are used 8 | int bgTrueColorFlag; // flag if truecolors or ANSI/VTERM/GREYSCALE colors are used 9 | // truecolors 10 | uint32_t fgTrueColor; // color in RGB (internal BGR) 11 | uint32_t bgTrueColor; // color in RGB (internal BGR) 12 | // ANSI/VTERM/GREYSCALE colors 13 | unsigned char fgIndex; // ANSI/VTERM/GREYSCALE color code (0-255) 14 | unsigned char fgIntensity; // 22=normal, 1=increased ("bright"), 2=decreased ("dark") 15 | unsigned char bgIndex; // ANSI/VTERM/GREYSCALE color code (0-255) 16 | unsigned char bgIntensity; // 22=normal, 1=increased ("bright") 17 | int inversion; // flag if bg/fg colors are inverted 18 | 19 | // default colors (ANSI/VTERM/GREYSCALE) 20 | unsigned char fgIndexDefault; // default ANSI/VTERM/GREYSCALE color code 21 | unsigned char fgIntensityDefault; // 22=normal, 1=increased, 2=decreased 22 | unsigned char bgIndexDefault; // default ANSI/VTERM/GREYSCALE color code 23 | unsigned char bgIntensityDefault; // 22=normal, 1=increased 24 | int inversionDefault; // flag if bg/fg colors are inverted 25 | 26 | // current colors (e.g. inverted) 27 | uint32_t color_fg; // color in RGB (internal BGR) 28 | uint32_t color_bg; // color in RGB (internal BGR) 29 | } ColorState; 30 | 31 | typedef struct PsvDebugScreenFont { 32 | unsigned char *glyphs, width, height, first, last, size_w, size_h; // only values 0-255 33 | } PsvDebugScreenFont; 34 | 35 | #define SCREEN_WIDTH (960) // screen resolution x 36 | #define SCREEN_HEIGHT (544) // screen resolution y 37 | 38 | #ifdef DEBUG_SCREEN_CODE_INCLUDE // not recommended for your own projects, but for sake of backward compatibility 39 | #include "debugScreen.c" 40 | #else 41 | #ifdef __cplusplus 42 | extern "C" { 43 | #endif 44 | int psvDebugScreenInit(); 45 | int psvDebugScreenFinish(); 46 | int psvDebugScreenPuts(const char * _text); 47 | int psvDebugScreenPrintf(const char *format, ...); 48 | void psvDebugScreenGetColorStateCopy(ColorState *copy); 49 | void psvDebugScreenGetCoordsXY(int *x, int *y); 50 | void psvDebugScreenSetCoordsXY(int *x, int *y); 51 | PsvDebugScreenFont *psvDebugScreenGetFont(void); 52 | PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font); 53 | PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font); 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | #endif 58 | 59 | #endif /* DEBUG_SCREEN_H */ 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PSVita-LLM 2 | 3 | > “Your scientists were so preoccupied with whether or not they could, they didn't stop to think if they should.” 4 | 5 | After blowing the dust off my PS Vita to play *LittleBigPlanet*, a stray question popped up: 6 | 7 | **Could it run an LLM?** 8 | 9 | Turns out it can! **PSVita-LLM** runs a modified version of `llama2.c` to load and infer the TinyStories 260K and 15M checkpoints right on the Vita. 10 | 11 | 12 | | Model | Parameters | File size | Inference speed (PCH-1000 Overclocked @ 555 MHz) | Results | 13 | |-------|------------|-----------|---------------------------------------------------|---------| 14 | | TinyStories-260K | 0.26 M | 1 MB | ≈ 120 tok/s | | 15 | | TinyStories-15M | 15 M | 60 MB | ≈ 1.8 tok/s | | 16 | 17 | 18 | ## Features 19 | 20 | - **Interactive Model Selector:** On startup, the app will prompt the user to a download model to start should it detect that there are no models downloaded. 21 | - **Full "Game Loop":** After a story is generated, you can choose to generate another, return to the model selection screen to switch/manage models, or exit the app completely. 22 | 23 | ## How to Use 24 | 25 | 1. **Install the VPK:** Transfer the `psvita-llm.vpk` file to your Vita and install it using VitaShell. 26 | 2. **Download Models & Tokenizers:** Upon first boot, the program will give you the models available to download. You can delete models and download any other model after that in the "Manage local models.." menu. 27 | 3. **Enjoy!** 28 | 29 | 30 | ## Building from Source 31 | 32 | To build this project yourself, you will need a working [VitaSDK](https://vitasdk.org/) installation. 33 | 34 | Once the SDK is configured, clone the repository and run: 35 | 36 | ```bash 37 | cmake . 38 | ``` 39 | 40 | This project would not be possible without Andrej Karpathy's foundational work on `llama2.c`. What was done was more of a port job if anything, but a great way to learn! 41 | 42 | ### Future Improvements 43 | 44 | Some notes that I want to leave in before I forget 45 | 46 | * **Breaking up some code:** Having everything in a single file like the original llama2.c file is pretty cool, but I should have seperated the networking code as I feel like that could be used in other projects that involve downloading stuff/doing curl calls on the internet, it'd be a good reference to have. 47 | 48 | * **Multithreading:** The current code has commented out `#pragma omp` directives. It's because OpenMP does not play nicely with the Vita's CPU. Leaving it on led to crashes upon token generation. A significant performance boost could probably be seen by implementing a native multithreading solution using stuff in `SceThreadMgr` library in the sdk (?), especially for parallelizing the `matmul` ops in the transformer's forward pass. **For now this is all single threaded**. 49 | 50 | But I'm out of my depth in terms of development with the SDK, though it is something worth considering should I give this project another look. 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## This file is a quick tutorial on writing CMakeLists for targeting the Vita 2 | cmake_minimum_required(VERSION 3.16) 3 | 4 | ## This includes the Vita toolchain, must go before project definition 5 | # It is a convenience so you do not have to type 6 | # -DCMAKE_TOOLCHAIN_FILE=$VITASDK/share/vita.toolchain.cmake for cmake. It is 7 | # highly recommended that you include this block for all projects. 8 | if(NOT DEFINED CMAKE_TOOLCHAIN_FILE) 9 | if(DEFINED ENV{VITASDK}) 10 | set(CMAKE_TOOLCHAIN_FILE "$ENV{VITASDK}/share/vita.toolchain.cmake" CACHE PATH "toolchain file") 11 | else() 12 | message(FATAL_ERROR "Please define VITASDK to point to your SDK path!") 13 | endif() 14 | endif() 15 | 16 | ## Define project parameters here 17 | # Name of the project 18 | project(psvita-llm) 19 | # This line adds Vita helper macros, must go after project definition in order 20 | # to build Vita specific artifacts (self/vpk). 21 | include("${VITASDK}/share/vita.cmake" REQUIRED) 22 | 23 | ## Configuration options for this app 24 | # Display name (under bubble in LiveArea) 25 | set(VITA_APP_NAME "PSVita LLM") 26 | # Unique ID must be exactly 9 characters. Recommended: XXXXYYYYY where X = 27 | # unique string of developer and Y = a unique number for this app 28 | set(VITA_TITLEID "LLMA00001") 29 | # Optional version string to show in LiveArea's more info screen 30 | set(VITA_VERSION "01.00") 31 | 32 | ## Flags and includes for building 33 | # Note that we make sure not to overwrite previous flags 34 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function -Wno-unused-variable -mcpu=cortex-a9 -mfloat-abi=hard -mfpu=neon -O3 -Ofast -fno-signed-zeros -fno-trapping-math -fassociative-math -freciprocal-math -fopenmp") 35 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 36 | # Optional. You can specify more param.sfo flags this way. 37 | set(VITA_MKSFOEX_FLAGS "${VITA_MKSFOEX_FLAGS} -d PARENTAL_LEVEL=1") 38 | 39 | find_package(CURL REQUIRED) 40 | find_package(OpenSSL REQUIRED) 41 | 42 | # Add any additional include paths here 43 | include_directories( 44 | common 45 | ) 46 | 47 | # Add any additional library paths here 48 | # ${CMAKE_CURRENT_BINARY_DIR} lets you use any library currently being built 49 | link_directories( 50 | ${CMAKE_CURRENT_BINARY_DIR} 51 | ) 52 | 53 | ## Build and link 54 | # Add all the files needed to compile here 55 | add_executable(${PROJECT_NAME} 56 | vita_llm.c 57 | common/debugScreen.c 58 | ) 59 | 60 | # Library to link to (drop the -l prefix). This will mostly be stubs. 61 | target_link_libraries(${PROJECT_NAME} 62 | m 63 | SceCtrl_stub 64 | SceSysmodule_stub 65 | SceNet_stub 66 | SceNetCtl_stub 67 | SceHttp_stub 68 | SceSsl_stub 69 | SceDisplay_stub # This used by debugScreenPrintf() 70 | ${CURL_LIBRARIES} 71 | ${OPENSSL_LIBRARIES} 72 | ) 73 | 74 | ## Create Vita files 75 | vita_create_self(${PROJECT_NAME}.self ${PROJECT_NAME}) 76 | # The FILE directive lets you add additional files to the VPK, the syntax is 77 | # FILE src_path dst_path_in_vpk. In this case, we add the LiveArea paths. 78 | vita_create_vpk(${PROJECT_NAME}.vpk ${VITA_TITLEID} ${PROJECT_NAME}.self 79 | VERSION ${VITA_VERSION} 80 | NAME ${VITA_APP_NAME} 81 | FILE sce_sys/icon0.png sce_sys/icon0.png 82 | FILE sce_sys/livearea/contents/bg.png sce_sys/livearea/contents/bg.png 83 | FILE sce_sys/livearea/contents/startup.png sce_sys/livearea/contents/startup.png 84 | FILE sce_sys/livearea/contents/template.xml sce_sys/livearea/contents/template.xml 85 | ) 86 | -------------------------------------------------------------------------------- /fetch_models.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import os 3 | import sys 4 | 5 | # stuff to grab (this script was from when I didn't have a way to download the models in-app so I made this script to download them to transfer to my vita through ftp) 6 | 7 | MODELS = { 8 | "260K": [ 9 | { 10 | "name": "stories260K.bin", 11 | "url": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin" 12 | }, 13 | { 14 | "name": "tok512.bin", 15 | "url": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin" 16 | } 17 | ], 18 | "15M": [ 19 | { 20 | "name": "stories15M.bin", 21 | "url": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin" 22 | }, 23 | { 24 | "name": "tokenizer.bin", 25 | "url": "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.bin" 26 | } 27 | ] 28 | } 29 | 30 | def _reporthook(count, block_size, total_size): 31 | """A simple reporthook for urllib.request.urlretrieve.""" 32 | percent = int(count * block_size * 100 / total_size) 33 | sys.stdout.write(f"\r Downloading... {percent}%") 34 | sys.stdout.flush() 35 | 36 | def download_file(name, url): 37 | """Downloads a file from a URL to the current directory.""" 38 | if os.path.exists(name): 39 | print(f"-> {name} already exists. Skipping.") 40 | return 41 | print(f"-> Downloading {name}...") 42 | try: 43 | urllib.request.urlretrieve(url, name, _reporthook) 44 | sys.stdout.write("\n") # Move to the next line after download completes 45 | print(f" Done.") 46 | except Exception as e: 47 | print(f"\n Error downloading {name}: {e}") 48 | print(" Please check your internet connection and the URL.") 49 | 50 | def download_model_suite(key): 51 | """Downloads all files for a given model suite.""" 52 | if key in MODELS: 53 | print(f"\nFetching {key} model suite...") 54 | for file_info in MODELS[key]: 55 | download_file(file_info["name"], file_info["url"]) 56 | else: 57 | print(f"Error: Model suite '{key}' not found.") 58 | 59 | def display_final_instructions(): 60 | """Prints the final instructions for the user.""" 61 | print("\n" + "="*50) 62 | print(" DOWNLOAD COMPLETE!") 63 | print("="*50) 64 | print("\nThe necessary files have been downloaded to the current directory:") 65 | print(f" {os.getcwd()}") 66 | print("\nNext steps:") 67 | print("1. Connect your PS Vita to another device via USB or FTP using VitaShell.") 68 | print("2. Navigate to the `ux0:` partition on your Vita.") 69 | print("3. Copy the downloaded .bin files into the `ux0:data/` folder.") 70 | print("\nOnce the files are in place, you can run PSVita-LLM!") 71 | print("="*50 + "\n") 72 | 73 | 74 | def main(): 75 | """Main function to run the interactive downloader.""" 76 | print("="*50) 77 | print("Model Downloader") 78 | print("="*50) 79 | print("This script will download the required model and tokenizer") 80 | print("files for the PSVita-LLM application.\n") 81 | 82 | while True: 83 | print("Please choose an option:") 84 | print(" [1] Download the 260K model (~1 MB)") 85 | print(" [2] Download the 15M model (~60 MB)") 86 | print(" [3] Download BOTH models") 87 | print(" [4] Exit") 88 | 89 | choice = input("\nEnter your choice (1-4): ") 90 | 91 | if choice == '1': 92 | download_model_suite("260K") 93 | display_final_instructions() 94 | break 95 | elif choice == '2': 96 | download_model_suite("15M") 97 | display_final_instructions() 98 | break 99 | elif choice == '3': 100 | download_model_suite("260K") 101 | download_model_suite("15M") 102 | display_final_instructions() 103 | break 104 | elif choice == '4': 105 | print("Exiting.") 106 | break 107 | else: 108 | print("\nInvalid choice. Please enter a number between 1 and 4.\n") 109 | 110 | if __name__ == "__main__": 111 | main() -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.31 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism. 9 | .NOTPARALLEL: 10 | 11 | #============================================================================= 12 | # Special targets provided by cmake. 13 | 14 | # Disable implicit rules so canonical targets will work. 15 | .SUFFIXES: 16 | 17 | # Disable VCS-based implicit rules. 18 | % : %,v 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/% 22 | 23 | # Disable VCS-based implicit rules. 24 | % : RCS/%,v 25 | 26 | # Disable VCS-based implicit rules. 27 | % : SCCS/s.% 28 | 29 | # Disable VCS-based implicit rules. 30 | % : s.% 31 | 32 | .SUFFIXES: .hpux_make_needs_suffix_list 33 | 34 | # Command-line flag to silence nested $(MAKE). 35 | $(VERBOSE)MAKESILENT = -s 36 | 37 | #Suppress display of executed commands. 38 | $(VERBOSE).SILENT: 39 | 40 | # A target that is always out of date. 41 | cmake_force: 42 | .PHONY : cmake_force 43 | 44 | #============================================================================= 45 | # Set environment variables for the build. 46 | 47 | # The shell in which to execute make rules. 48 | SHELL = /bin/sh 49 | 50 | # The CMake executable. 51 | CMAKE_COMMAND = /opt/homebrew/bin/cmake 52 | 53 | # The command to remove a file. 54 | RM = /opt/homebrew/bin/cmake -E rm -f 55 | 56 | # Escaping for special characters. 57 | EQUALS = = 58 | 59 | # The top-level source directory on which CMake was run. 60 | CMAKE_SOURCE_DIR = /Users/alex/Documents/Code/psvita-llm 61 | 62 | # The top-level build directory on which CMake was run. 63 | CMAKE_BINARY_DIR = /Users/alex/Documents/Code/psvita-llm 64 | 65 | #============================================================================= 66 | # Targets provided globally by CMake. 67 | 68 | # Special rule for the target edit_cache 69 | edit_cache: 70 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake cache editor..." 71 | /opt/homebrew/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 72 | .PHONY : edit_cache 73 | 74 | # Special rule for the target edit_cache 75 | edit_cache/fast: edit_cache 76 | .PHONY : edit_cache/fast 77 | 78 | # Special rule for the target rebuild_cache 79 | rebuild_cache: 80 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..." 81 | /opt/homebrew/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) 82 | .PHONY : rebuild_cache 83 | 84 | # Special rule for the target rebuild_cache 85 | rebuild_cache/fast: rebuild_cache 86 | .PHONY : rebuild_cache/fast 87 | 88 | # The main all target 89 | all: cmake_check_build_system 90 | $(CMAKE_COMMAND) -E cmake_progress_start /Users/alex/Documents/Code/psvita-llm/CMakeFiles /Users/alex/Documents/Code/psvita-llm//CMakeFiles/progress.marks 91 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all 92 | $(CMAKE_COMMAND) -E cmake_progress_start /Users/alex/Documents/Code/psvita-llm/CMakeFiles 0 93 | .PHONY : all 94 | 95 | # The main clean target 96 | clean: 97 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 clean 98 | .PHONY : clean 99 | 100 | # The main clean target 101 | clean/fast: clean 102 | .PHONY : clean/fast 103 | 104 | # Prepare targets for installation. 105 | preinstall: all 106 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall 107 | .PHONY : preinstall 108 | 109 | # Prepare targets for installation. 110 | preinstall/fast: 111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall 112 | .PHONY : preinstall/fast 113 | 114 | # clear depends 115 | depend: 116 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 117 | .PHONY : depend 118 | 119 | #============================================================================= 120 | # Target rules for targets named psvita-llm 121 | 122 | # Build rule for target. 123 | psvita-llm: cmake_check_build_system 124 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm 125 | .PHONY : psvita-llm 126 | 127 | # fast build rule for target. 128 | psvita-llm/fast: 129 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/build 130 | .PHONY : psvita-llm/fast 131 | 132 | #============================================================================= 133 | # Target rules for targets named psvita-llm-velf 134 | 135 | # Build rule for target. 136 | psvita-llm-velf: cmake_check_build_system 137 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm-velf 138 | .PHONY : psvita-llm-velf 139 | 140 | # fast build rule for target. 141 | psvita-llm-velf/fast: 142 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm-velf.dir/build.make CMakeFiles/psvita-llm-velf.dir/build 143 | .PHONY : psvita-llm-velf/fast 144 | 145 | #============================================================================= 146 | # Target rules for targets named psvita-llm.self-self 147 | 148 | # Build rule for target. 149 | psvita-llm.self-self: cmake_check_build_system 150 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm.self-self 151 | .PHONY : psvita-llm.self-self 152 | 153 | # fast build rule for target. 154 | psvita-llm.self-self/fast: 155 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.self-self.dir/build.make CMakeFiles/psvita-llm.self-self.dir/build 156 | .PHONY : psvita-llm.self-self/fast 157 | 158 | #============================================================================= 159 | # Target rules for targets named psvita-llm.vpk-vpk 160 | 161 | # Build rule for target. 162 | psvita-llm.vpk-vpk: cmake_check_build_system 163 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm.vpk-vpk 164 | .PHONY : psvita-llm.vpk-vpk 165 | 166 | # fast build rule for target. 167 | psvita-llm.vpk-vpk/fast: 168 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.vpk-vpk.dir/build.make CMakeFiles/psvita-llm.vpk-vpk.dir/build 169 | .PHONY : psvita-llm.vpk-vpk/fast 170 | 171 | common/debugScreen.obj: common/debugScreen.c.obj 172 | .PHONY : common/debugScreen.obj 173 | 174 | # target to build an object file 175 | common/debugScreen.c.obj: 176 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/common/debugScreen.c.obj 177 | .PHONY : common/debugScreen.c.obj 178 | 179 | common/debugScreen.i: common/debugScreen.c.i 180 | .PHONY : common/debugScreen.i 181 | 182 | # target to preprocess a source file 183 | common/debugScreen.c.i: 184 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/common/debugScreen.c.i 185 | .PHONY : common/debugScreen.c.i 186 | 187 | common/debugScreen.s: common/debugScreen.c.s 188 | .PHONY : common/debugScreen.s 189 | 190 | # target to generate assembly for a file 191 | common/debugScreen.c.s: 192 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/common/debugScreen.c.s 193 | .PHONY : common/debugScreen.c.s 194 | 195 | vita_llm.obj: vita_llm.c.obj 196 | .PHONY : vita_llm.obj 197 | 198 | # target to build an object file 199 | vita_llm.c.obj: 200 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/vita_llm.c.obj 201 | .PHONY : vita_llm.c.obj 202 | 203 | vita_llm.i: vita_llm.c.i 204 | .PHONY : vita_llm.i 205 | 206 | # target to preprocess a source file 207 | vita_llm.c.i: 208 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/vita_llm.c.i 209 | .PHONY : vita_llm.c.i 210 | 211 | vita_llm.s: vita_llm.c.s 212 | .PHONY : vita_llm.s 213 | 214 | # target to generate assembly for a file 215 | vita_llm.c.s: 216 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/vita_llm.c.s 217 | .PHONY : vita_llm.c.s 218 | 219 | # Help Target 220 | help: 221 | @echo "The following are some of the valid targets for this Makefile:" 222 | @echo "... all (the default if no target is provided)" 223 | @echo "... clean" 224 | @echo "... depend" 225 | @echo "... edit_cache" 226 | @echo "... rebuild_cache" 227 | @echo "... psvita-llm-velf" 228 | @echo "... psvita-llm.self-self" 229 | @echo "... psvita-llm.vpk-vpk" 230 | @echo "... psvita-llm" 231 | @echo "... common/debugScreen.obj" 232 | @echo "... common/debugScreen.i" 233 | @echo "... common/debugScreen.s" 234 | @echo "... vita_llm.obj" 235 | @echo "... vita_llm.i" 236 | @echo "... vita_llm.s" 237 | .PHONY : help 238 | 239 | 240 | 241 | #============================================================================= 242 | # Special targets to cleanup operation of make. 243 | 244 | # Special rule to run CMake to check the build system integrity. 245 | # No rule that depends on this can have commands that come from listfiles 246 | # because they might be regenerated. 247 | cmake_check_build_system: 248 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 249 | .PHONY : cmake_check_build_system 250 | 251 | -------------------------------------------------------------------------------- /common/debugScreenFont.c: -------------------------------------------------------------------------------- 1 | /* 2 | * PSP Software Development Kit - http://www.pspdev.org 3 | * ----------------------------------------------------------------------- 4 | * Licensed under the BSD license, see LICENSE in PSPSDK root for details. 5 | * 6 | * font.c - Debug Font. 7 | * 8 | * Copyright (c) 2005 Marcus R. Brown 9 | * Copyright (c) 2005 James Forshaw 10 | * Copyright (c) 2005 John Kelley 11 | * 12 | * $Id: font.c 540 2005-07-08 19:35:10Z warren $ 13 | */ 14 | 15 | PsvDebugScreenFont psvDebugScreenFont = { glyphs:(unsigned char*) 16 | "\x00\x00\x00\x00\x00\x00\x00\x00\x3c\x42\xa5\x81\xa5\x99\x42\x3c" 17 | "\x3c\x7e\xdb\xff\xff\xdb\x66\x3c\x6c\xfe\xfe\xfe\x7c\x38\x10\x00" 18 | "\x10\x38\x7c\xfe\x7c\x38\x10\x00\x10\x38\x54\xfe\x54\x10\x38\x00" 19 | "\x10\x38\x7c\xfe\xfe\x10\x38\x00\x00\x00\x00\x30\x30\x00\x00\x00" 20 | "\xff\xff\xff\xe7\xe7\xff\xff\xff\x38\x44\x82\x82\x82\x44\x38\x00" 21 | "\xc7\xbb\x7d\x7d\x7d\xbb\xc7\xff\x0f\x03\x05\x79\x88\x88\x88\x70" 22 | "\x38\x44\x44\x44\x38\x10\x7c\x10\x30\x28\x24\x24\x28\x20\xe0\xc0" 23 | "\x3c\x24\x3c\x24\x24\xe4\xdc\x18\x10\x54\x38\xee\x38\x54\x10\x00" 24 | "\x10\x10\x10\x7c\x10\x10\x10\x10\x10\x10\x10\xff\x00\x00\x00\x00" 25 | "\x00\x00\x00\xff\x10\x10\x10\x10\x10\x10\x10\xf0\x10\x10\x10\x10" 26 | "\x10\x10\x10\x1f\x10\x10\x10\x10\x10\x10\x10\xff\x10\x10\x10\x10" 27 | "\x10\x10\x10\x10\x10\x10\x10\x10\x00\x00\x00\xff\x00\x00\x00\x00" 28 | "\x00\x00\x00\x1f\x10\x10\x10\x10\x00\x00\x00\xf0\x10\x10\x10\x10" 29 | "\x10\x10\x10\x1f\x00\x00\x00\x00\x10\x10\x10\xf0\x00\x00\x00\x00" 30 | "\x81\x42\x24\x18\x18\x24\x42\x81\x01\x02\x04\x08\x10\x20\x40\x80" 31 | "\x80\x40\x20\x10\x08\x04\x02\x01\x00\x10\x10\xff\x10\x10\x00\x00" 32 | "\x00\x00\x00\x00\x00\x00\x00\x00\x20\x20\x20\x20\x00\x00\x20\x00" 33 | "\x50\x50\x50\x00\x00\x00\x00\x00\x50\x50\xf8\x50\xf8\x50\x50\x00" 34 | "\x20\x78\xa0\x70\x28\xf0\x20\x00\xc0\xc8\x10\x20\x40\x98\x18\x00" 35 | "\x40\xa0\x40\xa8\x90\x98\x60\x00\x10\x20\x40\x00\x00\x00\x00\x00" 36 | "\x10\x20\x40\x40\x40\x20\x10\x00\x40\x20\x10\x10\x10\x20\x40\x00" 37 | "\x20\xa8\x70\x20\x70\xa8\x20\x00\x00\x20\x20\xf8\x20\x20\x00\x00" 38 | "\x00\x00\x00\x00\x00\x20\x20\x40\x00\x00\x00\x78\x00\x00\x00\x00" 39 | "\x00\x00\x00\x00\x00\x60\x60\x00\x00\x00\x08\x10\x20\x40\x80\x00" 40 | "\x70\x88\x98\xa8\xc8\x88\x70\x00\x20\x60\xa0\x20\x20\x20\xf8\x00" 41 | "\x70\x88\x08\x10\x60\x80\xf8\x00\x70\x88\x08\x30\x08\x88\x70\x00" 42 | "\x10\x30\x50\x90\xf8\x10\x10\x00\xf8\x80\xe0\x10\x08\x10\xe0\x00" 43 | "\x30\x40\x80\xf0\x88\x88\x70\x00\xf8\x88\x10\x20\x20\x20\x20\x00" 44 | "\x70\x88\x88\x70\x88\x88\x70\x00\x70\x88\x88\x78\x08\x10\x60\x00" 45 | "\x00\x00\x20\x00\x00\x20\x00\x00\x00\x00\x20\x00\x00\x20\x20\x40" 46 | "\x18\x30\x60\xc0\x60\x30\x18\x00\x00\x00\xf8\x00\xf8\x00\x00\x00" 47 | "\xc0\x60\x30\x18\x30\x60\xc0\x00\x70\x88\x08\x10\x20\x00\x20\x00" 48 | "\x70\x88\x08\x68\xa8\xa8\x70\x00\x20\x50\x88\x88\xf8\x88\x88\x00" 49 | "\xf0\x48\x48\x70\x48\x48\xf0\x00\x30\x48\x80\x80\x80\x48\x30\x00" 50 | "\xe0\x50\x48\x48\x48\x50\xe0\x00\xf8\x80\x80\xf0\x80\x80\xf8\x00" 51 | "\xf8\x80\x80\xf0\x80\x80\x80\x00\x70\x88\x80\xb8\x88\x88\x70\x00" 52 | "\x88\x88\x88\xf8\x88\x88\x88\x00\x70\x20\x20\x20\x20\x20\x70\x00" 53 | "\x38\x10\x10\x10\x90\x90\x60\x00\x88\x90\xa0\xc0\xa0\x90\x88\x00" 54 | "\x80\x80\x80\x80\x80\x80\xf8\x00\x88\xd8\xa8\xa8\x88\x88\x88\x00" 55 | "\x88\xc8\xc8\xa8\x98\x98\x88\x00\x70\x88\x88\x88\x88\x88\x70\x00" 56 | "\xf0\x88\x88\xf0\x80\x80\x80\x00\x70\x88\x88\x88\xa8\x90\x68\x00" 57 | "\xf0\x88\x88\xf0\xa0\x90\x88\x00\x70\x88\x80\x70\x08\x88\x70\x00" 58 | "\xf8\x20\x20\x20\x20\x20\x20\x00\x88\x88\x88\x88\x88\x88\x70\x00" 59 | "\x88\x88\x88\x88\x50\x50\x20\x00\x88\x88\x88\xa8\xa8\xd8\x88\x00" 60 | "\x88\x88\x50\x20\x50\x88\x88\x00\x88\x88\x88\x70\x20\x20\x20\x00" 61 | "\xf8\x08\x10\x20\x40\x80\xf8\x00\x70\x40\x40\x40\x40\x40\x70\x00" 62 | "\x00\x00\x80\x40\x20\x10\x08\x00\x70\x10\x10\x10\x10\x10\x70\x00" 63 | "\x20\x50\x88\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00" 64 | "\x40\x20\x10\x00\x00\x00\x00\x00\x00\x00\x70\x08\x78\x88\x78\x00" 65 | "\x80\x80\xb0\xc8\x88\xc8\xb0\x00\x00\x00\x70\x88\x80\x88\x70\x00" 66 | "\x08\x08\x68\x98\x88\x98\x68\x00\x00\x00\x70\x88\xf8\x80\x70\x00" 67 | "\x10\x28\x20\xf8\x20\x20\x20\x00\x00\x00\x68\x98\x98\x68\x08\x70" 68 | "\x80\x80\xf0\x88\x88\x88\x88\x00\x20\x00\x60\x20\x20\x20\x70\x00" 69 | "\x10\x00\x30\x10\x10\x10\x90\x60\x40\x40\x48\x50\x60\x50\x48\x00" 70 | "\x60\x20\x20\x20\x20\x20\x70\x00\x00\x00\xd0\xa8\xa8\xa8\xa8\x00" 71 | "\x00\x00\xb0\xc8\x88\x88\x88\x00\x00\x00\x70\x88\x88\x88\x70\x00" 72 | "\x00\x00\xb0\xc8\xc8\xb0\x80\x80\x00\x00\x68\x98\x98\x68\x08\x08" 73 | "\x00\x00\xb0\xc8\x80\x80\x80\x00\x00\x00\x78\x80\xf0\x08\xf0\x00" 74 | "\x40\x40\xf0\x40\x40\x48\x30\x00\x00\x00\x90\x90\x90\x90\x68\x00" 75 | "\x00\x00\x88\x88\x88\x50\x20\x00\x00\x00\x88\xa8\xa8\xa8\x50\x00" 76 | "\x00\x00\x88\x50\x20\x50\x88\x00\x00\x00\x88\x88\x98\x68\x08\x70" 77 | "\x00\x00\xf8\x10\x20\x40\xf8\x00\x18\x20\x20\x40\x20\x20\x18\x00" 78 | "\x20\x20\x20\x00\x20\x20\x20\x00\xc0\x20\x20\x10\x20\x20\xc0\x00" 79 | "\x40\xa8\x10\x00\x00\x00\x00\x00\x00\x00\x20\x50\xf8\x00\x00\x00" 80 | "\x70\x88\x80\x80\x88\x70\x20\x60\x90\x00\x00\x90\x90\x90\x68\x00" 81 | "\x10\x20\x70\x88\xf8\x80\x70\x00\x20\x50\x70\x08\x78\x88\x78\x00" 82 | "\x48\x00\x70\x08\x78\x88\x78\x00\x20\x10\x70\x08\x78\x88\x78\x00" 83 | "\x20\x00\x70\x08\x78\x88\x78\x00\x00\x70\x80\x80\x80\x70\x10\x60" 84 | "\x20\x50\x70\x88\xf8\x80\x70\x00\x50\x00\x70\x88\xf8\x80\x70\x00" 85 | "\x20\x10\x70\x88\xf8\x80\x70\x00\x50\x00\x00\x60\x20\x20\x70\x00" 86 | "\x20\x50\x00\x60\x20\x20\x70\x00\x40\x20\x00\x60\x20\x20\x70\x00" 87 | "\x50\x00\x20\x50\x88\xf8\x88\x00\x20\x00\x20\x50\x88\xf8\x88\x00" 88 | "\x10\x20\xf8\x80\xf0\x80\xf8\x00\x00\x00\x6c\x12\x7e\x90\x6e\x00" 89 | "\x3e\x50\x90\x9c\xf0\x90\x9e\x00\x60\x90\x00\x60\x90\x90\x60\x00" 90 | "\x90\x00\x00\x60\x90\x90\x60\x00\x40\x20\x00\x60\x90\x90\x60\x00" 91 | "\x40\xa0\x00\xa0\xa0\xa0\x50\x00\x40\x20\x00\xa0\xa0\xa0\x50\x00" 92 | "\x90\x00\x90\x90\xb0\x50\x10\xe0\x50\x00\x70\x88\x88\x88\x70\x00" 93 | "\x50\x00\x88\x88\x88\x88\x70\x00\x20\x20\x78\x80\x80\x78\x20\x20" 94 | "\x18\x24\x20\xf8\x20\xe2\x5c\x00\x88\x50\x20\xf8\x20\xf8\x20\x00" 95 | "\xc0\xa0\xa0\xc8\x9c\x88\x88\x8c\x18\x20\x20\xf8\x20\x20\x20\x40" 96 | "\x10\x20\x70\x08\x78\x88\x78\x00\x10\x20\x00\x60\x20\x20\x70\x00" 97 | "\x20\x40\x00\x60\x90\x90\x60\x00\x20\x40\x00\x90\x90\x90\x68\x00" 98 | "\x50\xa0\x00\xa0\xd0\x90\x90\x00\x28\x50\x00\xc8\xa8\x98\x88\x00" 99 | "\x00\x70\x08\x78\x88\x78\x00\xf8\x00\x60\x90\x90\x90\x60\x00\xf0" 100 | "\x20\x00\x20\x40\x80\x88\x70\x00\x00\x00\x00\xf8\x80\x80\x00\x00" 101 | "\x00\x00\x00\xf8\x08\x08\x00\x00\x84\x88\x90\xa8\x54\x84\x08\x1c" 102 | "\x84\x88\x90\xa8\x58\xa8\x3c\x08\x20\x00\x00\x20\x20\x20\x20\x00" 103 | "\x00\x00\x24\x48\x90\x48\x24\x00\x00\x00\x90\x48\x24\x48\x90\x00" 104 | "\x28\x50\x20\x50\x88\xf8\x88\x00\x28\x50\x70\x08\x78\x88\x78\x00" 105 | "\x28\x50\x00\x70\x20\x20\x70\x00\x28\x50\x00\x20\x20\x20\x70\x00" 106 | "\x28\x50\x00\x70\x88\x88\x70\x00\x50\xa0\x00\x60\x90\x90\x60\x00" 107 | "\x28\x50\x00\x88\x88\x88\x70\x00\x50\xa0\x00\xa0\xa0\xa0\x50\x00" 108 | "\xfc\x48\x48\x48\xe8\x08\x50\x20\x00\x50\x00\x50\x50\x50\x10\x20" 109 | "\xc0\x44\xc8\x54\xec\x54\x9e\x04\x10\xa8\x40\x00\x00\x00\x00\x00" 110 | "\x00\x20\x50\x88\x50\x20\x00\x00\x88\x10\x20\x40\x80\x28\x00\x00" 111 | "\x7c\xa8\xa8\x68\x28\x28\x28\x00\x38\x40\x30\x48\x48\x30\x08\x70" 112 | "\x00\x00\x00\x00\x00\x00\xff\xff\xf0\xf0\xf0\xf0\x0f\x0f\x0f\x0f" 113 | "\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00" 114 | "\x00\x00\x00\x3c\x3c\x00\x00\x00\xff\xff\xff\xff\xff\xff\x00\x00" 115 | "\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\x0f\x0f\x0f\x0f\xf0\xf0\xf0\xf0" 116 | "\xfc\xfc\xfc\xfc\xfc\xfc\xfc\xfc\x03\x03\x03\x03\x03\x03\x03\x03" 117 | "\x3f\x3f\x3f\x3f\x3f\x3f\x3f\x3f\x11\x22\x44\x88\x11\x22\x44\x88" 118 | "\x88\x44\x22\x11\x88\x44\x22\x11\xfe\x7c\x38\x10\x00\x00\x00\x00" 119 | "\x00\x00\x00\x00\x10\x38\x7c\xfe\x80\xc0\xe0\xf0\xe0\xc0\x80\x00" 120 | "\x01\x03\x07\x0f\x07\x03\x01\x00\xff\x7e\x3c\x18\x18\x3c\x7e\xff" 121 | "\x81\xc3\xe7\xff\xff\xe7\xc3\x81\xf0\xf0\xf0\xf0\x00\x00\x00\x00" 122 | "\x00\x00\x00\x00\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x00\x00\x00\x00" 123 | "\x00\x00\x00\x00\xf0\xf0\xf0\xf0\x33\x33\xcc\xcc\x33\x33\xcc\xcc" 124 | "\x00\x20\x20\x50\x50\x88\xf8\x00\x20\x20\x70\x20\x70\x20\x20\x00" 125 | "\x00\x00\x00\x50\x88\xa8\x50\x00\xff\xff\xff\xff\xff\xff\xff\xff" 126 | "\x00\x00\x00\x00\xff\xff\xff\xff\xf0\xf0\xf0\xf0\xf0\xf0\xf0\xf0" 127 | "\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x0f\xff\xff\xff\xff\x00\x00\x00\x00" 128 | "\x00\x00\x68\x90\x90\x90\x68\x00\x30\x48\x48\x70\x48\x48\x70\xc0" 129 | "\xf8\x88\x80\x80\x80\x80\x80\x00\xf8\x50\x50\x50\x50\x50\x98\x00" 130 | "\xf8\x88\x40\x20\x40\x88\xf8\x00\x00\x00\x78\x90\x90\x90\x60\x00" 131 | "\x00\x50\x50\x50\x50\x68\x80\x80\x00\x50\xa0\x20\x20\x20\x20\x00" 132 | "\xf8\x20\x70\xa8\xa8\x70\x20\xf8\x20\x50\x88\xf8\x88\x50\x20\x00" 133 | "\x70\x88\x88\x88\x50\x50\xd8\x00\x30\x40\x40\x20\x50\x50\x50\x20" 134 | "\x00\x00\x00\x50\xa8\xa8\x50\x00\x08\x70\xa8\xa8\xa8\x70\x80\x00" 135 | "\x38\x40\x80\xf8\x80\x40\x38\x00\x70\x88\x88\x88\x88\x88\x88\x00" 136 | "\x00\xf8\x00\xf8\x00\xf8\x00\x00\x20\x20\xf8\x20\x20\x00\xf8\x00" 137 | "\xc0\x30\x08\x30\xc0\x00\xf8\x00\x18\x60\x80\x60\x18\x00\xf8\x00" 138 | "\x10\x28\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\xa0\x40" 139 | "\x00\x20\x00\xf8\x00\x20\x00\x00\x00\x50\xa0\x00\x50\xa0\x00\x00" 140 | "\x00\x18\x24\x24\x18\x00\x00\x00\x00\x30\x78\x78\x30\x00\x00\x00" 141 | "\x00\x00\x00\x00\x30\x00\x00\x00\x3e\x20\x20\x20\xa0\x60\x20\x00" 142 | "\xa0\x50\x50\x50\x00\x00\x00\x00\x40\xa0\x20\x40\xe0\x00\x00\x00" 143 | "\x00\x38\x38\x38\x38\x38\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00", 144 | width :8, height:8, first:0, last:255, size_w:8, size_h:8}; -------------------------------------------------------------------------------- /common/debugScreen.c: -------------------------------------------------------------------------------- 1 | #ifndef DEBUG_SCREEN_C 2 | #define DEBUG_SCREEN_C 3 | 4 | /* 5 | * debugScreen.c of Vita SDK 6 | * 7 | * - psvDebugScreenInit() 8 | * Initializes debug screen for output. 9 | * 10 | * - psvDebugScreenPuts() 11 | * Similar to the C library function puts() writes a string to the debug 12 | * screen up to but not including the NUL character. 13 | * Supports the most important CSI sequences of ECMA-48 / ISO/IEC 6429:1992. 14 | * Graphic Rendition Combination Mode (GRCM) supported is Cumulative. 15 | * Modifications: 16 | * - CSI SGR codes 30-37/38/39 & 40-47/48/49 set standard/fitting/default intensity, so instead of "\e[1;31m" use "\e31;1m" 17 | * - ANSI color #8 is made darker (40<>80), so that "dark" white is still lighter than "bright" dark 18 | * - support 16 save storages for CSI s and CSI u, e.g "\e[8s" and "\e[8u" 19 | * [1] https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences 20 | * [2] https://jonasjacek.github.io/colors/ 21 | * [3] https://www.ecma-international.org/publications/standards/Ecma-048.htm 22 | * [4] https://invisible-island.net/xterm/ctlseqs/ctlseqs.html 23 | * [5] http://man7.org/linux/man-pages/man4/console_codes.4.html 24 | * 25 | * (CSI = "\e[") 26 | * CSI [n] s = Save Cursor Position to slot #n (0-15). Default 0. 27 | * CSI [n] u = Restore Cursor Position from slot #n (0-15). Default 0. 28 | * CSI n A = Cursor Up times. 29 | * CSI n B = Cursor Down times. 30 | * CSI n C = Cursor Forward times. 31 | * CSI n D = Cursor Back times. 32 | * CSI n E = Cursor Next Line times and to Beginning of that Line. 33 | * CSI n F = Cursor Previous Line times and to Beginning of that Line. 34 | * CSI n G = Cursor to Column . The value is 1-based and defaults to 1 (first column) if omitted. 35 | * CSI n ; m H = Cursor to Row and Column . The values are 1-based and default to 1 (top left corner) if omitted. 36 | * CSI n ; m f = Cursor to Row and Column . The values are 1-based and default to 1 (top left corner) if omitted. 37 | * CSI [n] J = Clears part of the screen. Cursor position does not change. 38 | * 0 (default) from cursor to end of screen. 39 | * 1 from cursor to beginning of the screen. 40 | * 2 entire screen 41 | * CSI [n] K = Clears part of the line. Cursor position does not change. 42 | * 0 (default) from cursor to end of line. 43 | * 1 from cursor to beginning of line. 44 | * 2 clear entire line. 45 | * CSI [n] m = Sets the appearance of the following characters. 46 | * 0 Reset all (colors and inversion) (default) 47 | * 1 Increased intensity ("bright" color) 48 | * 2 Decreased intensity ("faint"/"dark" color) 49 | * 7 Enable inversion 50 | * 22 Standard intensity ("normal" color) 51 | * 27 Disable inversion 52 | * 30–37 Set ANSI foreground color with standard intensity 53 | * 38 Set foreground color. Arguments are 5; or 2;;; 54 | * 39 Default foreground color 55 | * 40–47 Set standard ANSI background color with standard intensity 56 | * 48 Set background color. Arguments are 5; or 2;;; 57 | * 49 Default background color 58 | * 90–97 Set ANSI foreground color with increased intensity 59 | * 100–107 Set ANSI background color with increased intensity 60 | * 61 | * - psvDebugScreenPrintf() 62 | * Similar to the C library function printf() formats a string and ouputs 63 | * it via psvDebugScreenPuts() to the debug screen. 64 | * 65 | * - psvDebugScreenGetColorStateCopy(ColorState *copy) 66 | * Get copy of current color state. 67 | * 68 | * - psvDebugScreenGetCoordsXY(int *x, int *y) 69 | * Get copy of current pixel coordinates. 70 | * Allows for multiple and custom position stores. 71 | * Allows correct positioning when using different font sizes. 72 | * 73 | * - psvDebugScreenSetCoordsXY(int *x, int *y) 74 | * Set pixel coordinates. 75 | * Allows for multiple and custom position stores. 76 | * Allows correct positioning when using different font sizes. 77 | * 78 | * - PsvDebugScreenFont *psvDebugScreenGetFont() 79 | * Get current font. 80 | * 81 | * - PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font) { 82 | * Set font. Returns current font. 83 | * 84 | * - PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font) { 85 | * Scales a font by 2 (e.g. 8x8 to 16x16) and returns new scaled font. 86 | * 87 | * Also see the following samples: 88 | * - debugscreen 89 | * - debug_print 90 | * 91 | */ 92 | 93 | #include // for malloc(), free() 94 | #include // for vsnprintf() 95 | #include // for memset(), memcpy() 96 | #include // for va_list, va_start(), va_end() 97 | #include 98 | 99 | #include "debugScreen.h" 100 | 101 | #include "debugScreenFont.c" 102 | 103 | #define SCREEN_FB_WIDTH (960) // frame buffer aligned width for accessing vram 104 | #define SCREEN_FB_SIZE (2 * 1024 * 1024) // Must be 256KB aligned 105 | #ifndef SCREEN_TAB_SIZE // this allows easy overriding 106 | #define SCREEN_TAB_SIZE (8) 107 | #endif 108 | #define SCREEN_TAB_W ((F)->size_w * (SCREEN_TAB_SIZE)) 109 | #define F psvDebugScreenFontCurrent 110 | 111 | #define FROM_FULL_RGB(r,g,b ) ( ((b)<<16) | ((g)<<8) | (r) ) 112 | #define CONVERT_RGB_BGR(rgb) rgb = ( (((rgb)&0x0000FF)<<16) | ((rgb)&0x00FF00) | (((rgb)&0xFF0000)>>16) ) 113 | 114 | #define CLEARSCRNBLOCK(H,toH,W,toW,color) for (int h = (H); h < (toH); h++) for (int w = (W); w < (toW); w++) ((uint32_t*)base)[h*(SCREEN_FB_WIDTH) + w] = (color); 115 | #define CLEARSCRNLINES(H,toH,color) { uint32_t *pixel = (uint32_t *)base + ((H) * (SCREEN_FB_WIDTH)); int i = (((toH) - (H)) * (SCREEN_FB_WIDTH)); for (; i > 0; i--) *pixel++ = (color); } 116 | 117 | #define SAVE_STORAGES 16 118 | 119 | static int initialized = 0; 120 | static int mutex, coordX, coordY; 121 | static int savedX[SAVE_STORAGES] = { 0 }, savedY[SAVE_STORAGES] = { 0 }; 122 | static ColorState colors = { 123 | 0, 0, // truecolor flags 124 | 0, 0, // truecolors 125 | 0, 0, 0, 0, 0, // ANSI/VTERM/GREYSCALE colors 126 | 7, 22, 0, 22, 0, // default colors (ANSI/VTERM/GREYSCALE) 127 | 0, 0 // current colors 128 | }; 129 | 130 | static PsvDebugScreenFont *psvDebugScreenFontCurrent = &psvDebugScreenFont; 131 | 132 | #ifdef __vita__ 133 | #include 134 | #include 135 | #include 136 | static SceUID displayblock; 137 | static void* base; // pointer to frame buffer 138 | #else 139 | #define NO_psvDebugScreenInit 140 | #ifndef psvDebugScreenInitReplacement 141 | #define psvDebugScreenInitReplacement(...) 142 | #endif 143 | #define sceKernelLockMutex(m,v,x) m=v 144 | #define sceKernelUnlockMutex(m,v) m=v 145 | static char base[(SCREEN_FB_WIDTH) * (SCREEN_HEIGHT) * 4]; 146 | #endif 147 | 148 | static uint32_t DARK_COLORS_BGR[8] = { 149 | 0x000000, 0x000040, 0x004000, 0x004040, 0x400000, 0x400040, 0x404000, 0x808080, // 0-7 150 | }; 151 | 152 | // ANSI/VTERM/GREYSCALE palette: https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit 153 | // modifications: 154 | // - #8 is made darker (40<>80), so that "dark" white is still lighter than "bright" dark 155 | static uint32_t ANSI_COLORS_BGR[256] = { 156 | 0x000000, 0x000080, 0x008000, 0x008080, 0x800000, 0x800080, 0x808000, 0xc0c0c0, // 0-7 157 | 0x404040, 0x0000ff, 0x00ff00, 0x00ffff, 0xff0000, 0xff00ff, 0xffff00, 0xffffff, // 8-15 158 | 0x000000, 0x5f0000, 0x870000, 0xaf0000, 0xd70000, 0xff0000, 0x005f00, 0x5f5f00, // 16-23 159 | 0x875f00, 0xaf5f00, 0xd75f00, 0xff5f00, 0x008700, 0x5f8700, 0x878700, 0xaf8700, // 24-31 160 | 0xd78700, 0xff8700, 0x00af00, 0x5faf00, 0x87af00, 0xafaf00, 0xd7af00, 0xffaf00, // 32-39 161 | 0x00d700, 0x5fd700, 0x87d700, 0xafd700, 0xd7d700, 0xffd700, 0x00ff00, 0x5fff00, // 40-47 162 | 0x87ff00, 0xafff00, 0xd7ff00, 0xffff00, 0x00005f, 0x5f005f, 0x87005f, 0xaf005f, // 48-55 163 | 0xd7005f, 0xff005f, 0x005f5f, 0x5f5f5f, 0x875f5f, 0xaf5f5f, 0xd75f5f, 0xff5f5f, // 56-63 164 | 0x00875f, 0x5f875f, 0x87875f, 0xaf875f, 0xd7875f, 0xff875f, 0x00af5f, 0x5faf5f, // 64-71 165 | 0x87af5f, 0xafaf5f, 0xd7af5f, 0xffaf5f, 0x00d75f, 0x5fd75f, 0x87d75f, 0xafd75f, // 72-79 166 | 0xd7d75f, 0xffd75f, 0x00ff5f, 0x5fff5f, 0x87ff5f, 0xafff5f, 0xd7ff5f, 0xffff5f, // 80-87 167 | 0x000087, 0x5f0087, 0x870087, 0xaf0087, 0xd70087, 0xff0087, 0x005f87, 0x5f5f87, // 88-95 168 | 0x875f87, 0xaf5f87, 0xd75f87, 0xff5f87, 0x008787, 0x5f8787, 0x878787, 0xaf8787, // 96-103 169 | 0xd78787, 0xff8787, 0x00af87, 0x5faf87, 0x87af87, 0xafaf87, 0xd7af87, 0xffaf87, // 104-111 170 | 0x00d787, 0x5fd787, 0x87d787, 0xafd787, 0xd7d787, 0xffd787, 0x00ff87, 0x5fff87, // 112-119 171 | 0x87ff87, 0xafff87, 0xd7ff87, 0xffff87, 0x0000af, 0x5f00af, 0x8700af, 0xaf00af, // 120-127 172 | 0xd700af, 0xff00af, 0x005faf, 0x5f5faf, 0x875faf, 0xaf5faf, 0xd75faf, 0xff5faf, // 128-135 173 | 0x0087af, 0x5f87af, 0x8787af, 0xaf87af, 0xd787af, 0xff87af, 0x00afaf, 0x5fafaf, // 136-143 174 | 0x87afaf, 0xafafaf, 0xd7afaf, 0xffafaf, 0x00d7af, 0x5fd7af, 0x87d7af, 0xafd7af, // 144-151 175 | 0xd7d7af, 0xffd7af, 0x00ffaf, 0x5fffaf, 0x87ffaf, 0xafffaf, 0xd7ffaf, 0xffffaf, // 152-159 176 | 0x0000d7, 0x5f00d7, 0x8700d7, 0xaf00d7, 0xd700d7, 0xff00d7, 0x005fd7, 0x5f5fd7, // 160-167 177 | 0x875fd7, 0xaf5fd7, 0xd75fd7, 0xff5fd7, 0x0087d7, 0x5f87d7, 0x8787d7, 0xaf87d7, // 168-175 178 | 0xd787d7, 0xff87d7, 0x00afd7, 0x5fafd7, 0x87afd7, 0xafafd7, 0xd7afd7, 0xffafd7, // 176-183 179 | 0x00d7d7, 0x5fd7d7, 0x87d7d7, 0xafd7d7, 0xd7d7d7, 0xffd7d7, 0x00ffd7, 0x5fffd7, // 184-191 180 | 0x87ffd7, 0xafffd7, 0xd7ffd7, 0xffffd7, 0x0000ff, 0x5f00ff, 0x8700ff, 0xaf00ff, // 192-199 181 | 0xd700ff, 0xff00ff, 0x005fff, 0x5f5fff, 0x875fff, 0xaf5fff, 0xd75fff, 0xff5fff, // 200-207 182 | 0x0087ff, 0x5f87ff, 0x8787ff, 0xaf87ff, 0xd787ff, 0xff87ff, 0x00afff, 0x5fafff, // 208-215 183 | 0x87afff, 0xafafff, 0xd7afff, 0xffafff, 0x00d7ff, 0x5fd7ff, 0x87d7ff, 0xafd7ff, // 216-223 184 | 0xd7d7ff, 0xffd7ff, 0x00ffff, 0x5fffff, 0x87ffff, 0xafffff, 0xd7ffff, 0xffffff, // 224-231 185 | 0x080808, 0x121212, 0x1c1c1c, 0x262626, 0x303030, 0x3a3a3a, 0x444444, 0x4e4e4e, // 232-239 186 | 0x585858, 0x626262, 0x6c6c6c, 0x767676, 0x808080, 0x8a8a8a, 0x949494, 0x9e9e9e, // 240-247 187 | 0xa8a8a8, 0xb2b2b2, 0xbcbcbc, 0xc6c6c6, 0xd0d0d0, 0xdadada, 0xe4e4e4, 0xeeeeee, // 248-255 188 | }; 189 | 190 | /* 191 | * Reset foreground color to default 192 | */ 193 | static void psvDebugScreenResetFgColor(void) { 194 | colors.fgTrueColorFlag = 0; 195 | colors.fgTrueColor = 0; 196 | colors.fgIndex = colors.fgIndexDefault; 197 | colors.fgIntensity = colors.fgIntensityDefault; 198 | } 199 | 200 | /* 201 | * Reset background color to default 202 | */ 203 | static void psvDebugScreenResetBgColor(void) { 204 | colors.bgTrueColorFlag = 0; 205 | colors.bgTrueColor = 0; 206 | colors.bgIndex = colors.bgIndexDefault; 207 | colors.bgIntensity = colors.bgIntensityDefault; 208 | } 209 | 210 | /* 211 | * Reset inversion state to default 212 | */ 213 | static void psvDebugScreenResetInversion(void) { 214 | colors.inversion = colors.inversionDefault; 215 | } 216 | 217 | /* 218 | * Determine colors according to current color state 219 | */ 220 | static void psvDebugScreenSetColors(void) { 221 | uint32_t *color_fg, *color_bg; 222 | 223 | // special case: inversion 224 | if (!colors.inversion) { 225 | color_fg = &colors.color_fg; 226 | color_bg = &colors.color_bg; 227 | } else { 228 | color_fg = &colors.color_bg; 229 | color_bg = &colors.color_fg; 230 | } 231 | 232 | // foregound color 233 | if ((colors.fgIndex<=7) && (colors.fgIntensity==1)) { // ANSI palette with increased intensity 234 | colors.fgIndex |= 0x8; 235 | } else if ((colors.fgIndex<=15) && (colors.fgIntensity!=1)) { // ANSI palette with standard/decreased intensity 236 | colors.fgIndex &= 0x7; 237 | } 238 | if (colors.fgTrueColorFlag) { 239 | *color_fg = colors.fgTrueColor; 240 | } else { 241 | if ((colors.fgIndex<=7) && (colors.fgIntensity==2)) { // "ANSI" palette with decreased intensity 242 | *color_fg = DARK_COLORS_BGR[colors.fgIndex]; 243 | } else { // ANSI/VTERM/GREYSCALE palette 244 | *color_fg = ANSI_COLORS_BGR[colors.fgIndex]; 245 | } 246 | } 247 | *color_fg |= 0xFF000000; // opaque 248 | 249 | // backgound color 250 | if ((colors.bgIndex<=7) && (colors.bgIntensity==1)) { // ANSI palette with increased intensity 251 | colors.bgIndex |= 0x8; 252 | } else if ((colors.bgIndex<=15) && (colors.bgIntensity!=1)) { // ANSI palette with standard/decreased intensity 253 | colors.bgIndex &= 0x7; 254 | } 255 | if (colors.bgTrueColorFlag) { 256 | *color_bg = colors.bgTrueColor; 257 | } else { 258 | if ((colors.bgIndex<=7) && (colors.bgIntensity==2)) { // "ANSI" palette with decreased intensity 259 | *color_bg = DARK_COLORS_BGR[colors.bgIndex]; 260 | } else { // ANSI/VTERM/GREYSCALE palette 261 | *color_bg = ANSI_COLORS_BGR[colors.bgIndex]; 262 | } 263 | } 264 | *color_bg |= 0xFF000000; // opaque 265 | } 266 | 267 | /* 268 | * Parse CSI sequences 269 | */ 270 | static size_t psvDebugScreenEscape(const unsigned char *str) { 271 | unsigned int i, argc, arg[32] = { 0 }; 272 | unsigned int c; 273 | uint32_t unit, mode; 274 | int *colorTrueColorFlag; 275 | uint32_t *colorTrueColor; 276 | unsigned char *colorIndex, *colorIntensity; 277 | for (i = 0, argc = 0; (argc < (sizeof(arg)/sizeof(*arg))) && (str[i] != '\0'); i++) { 278 | switch (str[i]) { 279 | // numeric char 280 | case '0': 281 | case '1': 282 | case '2': 283 | case '3': 284 | case '4': 285 | case '5': 286 | case '6': 287 | case '7': 288 | case '8': 289 | case '9': 290 | arg[argc] = (arg[argc] * 10) + (str[i] - '0'); 291 | continue; 292 | // argument separator 293 | case ';': argc++; continue; 294 | // CSI commands 295 | // save/restore position 296 | case 's': 297 | if (arg[0]size_h; return i; 304 | case 'B': coordY += arg[0] * (F)->size_h; return i; 305 | case 'C': coordX += arg[0] * (F)->size_w; return i; 306 | case 'D': coordX -= arg[0] * (F)->size_w; return i; 307 | // cursor movement to beginning of next/previous line(s) 308 | case 'E': coordY += arg[0] * (F)->size_h; coordX = 0; return i; 309 | case 'F': coordY -= arg[0] * (F)->size_h; coordX = 0; return i; 310 | // cursor positioning 311 | case 'G': coordX = (arg[0]-1) * (F)->size_w; return i; 312 | case 'H': 313 | case 'f': 314 | coordY = (arg[0]-1) * (F)->size_h; 315 | coordX = (arg[1]-1) * (F)->size_w; 316 | return i; 317 | // clear part of "J"=screen or "K"=Line, so J code re-uses part of K 318 | case 'J': 319 | case 'K': 320 | if (arg[0]==0) { // from cursor to end of line/screen 321 | CLEARSCRNBLOCK(coordY, coordY + (F)->size_h, coordX, (SCREEN_WIDTH), colors.color_bg); // line 322 | if (str[i]=='J') CLEARSCRNLINES(coordY + (F)->size_h, (SCREEN_HEIGHT), colors.color_bg); // screen 323 | } else if (arg[0]==1) { // from beginning of line/screen to cursor 324 | CLEARSCRNBLOCK(coordY, coordY + (F)->size_h, 0, coordX, colors.color_bg); // line 325 | if (str[i]=='J') CLEARSCRNLINES(0, coordY, colors.color_bg); // screen 326 | } else if (arg[0]==2) { // whole line/screen 327 | if (str[i]=='K') CLEARSCRNLINES(coordY, coordY + (F)->size_h, colors.color_bg) // line 328 | else if (str[i]=='J') CLEARSCRNLINES(0, (SCREEN_HEIGHT), colors.color_bg); // screen 329 | } 330 | return i; 331 | // color 332 | case 'm': 333 | for (c = 0; c <= argc; c++) { 334 | switch (arg[c]) { 335 | // reset all 336 | case 0: 337 | psvDebugScreenResetFgColor(); 338 | psvDebugScreenResetBgColor(); 339 | psvDebugScreenResetInversion(); 340 | continue; 341 | break; 342 | // intensity 343 | case 1: // increased = "bright" color 344 | case 2: // decreased = "dark" color 345 | case 22: // standard = "normal" color 346 | colors.fgIntensity = arg[c]; 347 | continue; 348 | break; 349 | // inversion 350 | case 7: // enable 351 | colors.inversion = 1; 352 | continue; 353 | break; 354 | case 27: // disable 355 | colors.inversion = 0; 356 | continue; 357 | break; 358 | // set from color map or truecolor 359 | case 38: // foreground color 360 | case 48: // background color 361 | mode = arg[c] / 10; 362 | colorTrueColorFlag = mode&1 ? &colors.fgTrueColorFlag : &colors.bgTrueColorFlag; 363 | if (arg[c+1]==5) { // 8-bit: [0-15][16-231][232-255] color map 364 | *colorTrueColorFlag = 0; 365 | colorIndex = mode&1 ? &colors.fgIndex : &colors.bgIndex; 366 | *colorIndex = arg[c+2] & 0xFF; 367 | colorIntensity = mode&1 ? &colors.fgIntensity : &colors.bgIntensity; 368 | *colorIntensity = ((*colorIndex>=8) && (*colorIndex<=15)) ? 1 : 22; 369 | c+=2; // extra arguments 370 | } else if (arg[c+1]==2) { // 24-bit color space 371 | *colorTrueColorFlag = 1; 372 | colorTrueColor = mode&1 ? &colors.fgTrueColor : &colors.bgTrueColor; 373 | *colorTrueColor = FROM_FULL_RGB(arg[c+2], arg[c+3], arg[c+4]); 374 | c+=4; // extra arguments 375 | } 376 | continue; 377 | break; 378 | // default color 379 | case 39: // foreground color 380 | psvDebugScreenResetFgColor(); 381 | continue; 382 | break; 383 | case 49: // background color 384 | psvDebugScreenResetBgColor(); 385 | continue; 386 | break; 387 | // custom color reset 388 | default: 389 | // ANSI colors (30-37, 40-47, 90-97, 100-107) 390 | mode = arg[c] / 10; 391 | if ((mode!=3) && (mode!=4) && (mode!=9) && (mode!=10)) continue; // skip unsupported modes 392 | unit = arg[c] % 10; 393 | if (unit>7) continue; // skip unsupported modes 394 | colorTrueColorFlag = mode&1 ? &colors.fgTrueColorFlag : &colors.bgTrueColorFlag; 395 | *colorTrueColorFlag = 0; 396 | colorIndex = mode&1 ? &colors.fgIndex : &colors.bgIndex; 397 | *colorIndex = unit; 398 | colorIntensity = mode&1 ? &colors.fgIntensity : &colors.bgIntensity; 399 | *colorIntensity = mode&8 ? 1 : 22; 400 | break; 401 | } 402 | } 403 | psvDebugScreenSetColors(); 404 | return i; 405 | } 406 | } 407 | return 0; 408 | } 409 | 410 | /* 411 | * Initialize debug screen 412 | */ 413 | int psvDebugScreenInit() { 414 | psvDebugScreenResetFgColor(); 415 | psvDebugScreenResetBgColor(); 416 | psvDebugScreenResetInversion(); 417 | psvDebugScreenSetColors(); 418 | 419 | #ifdef NO_psvDebugScreenInit 420 | psvDebugScreenInitReplacement(); 421 | initialized = 1; 422 | return 0; // avoid linking non-initializer (prx) with sceDisplay/sceMemory 423 | #else 424 | mutex = sceKernelCreateMutex("log_mutex", 0, 0, NULL); 425 | displayblock = sceKernelAllocMemBlock("display", SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW, (SCREEN_FB_SIZE), NULL); 426 | if (displayblock < 0) 427 | return displayblock; 428 | sceKernelGetMemBlockBase(displayblock, (void**)&base); 429 | SceDisplayFrameBuf frame = { sizeof(frame), base, (SCREEN_FB_WIDTH), 0, (SCREEN_WIDTH), (SCREEN_HEIGHT) }; 430 | initialized = 1; 431 | return sceDisplaySetFrameBuf(&frame, SCE_DISPLAY_SETBUF_NEXTFRAME); 432 | #endif 433 | } 434 | 435 | /* 436 | * Finalize debug screen 437 | */ 438 | int psvDebugScreenFinish() { 439 | if (!initialized) 440 | return -1; 441 | 442 | initialized = 0; 443 | 444 | #ifdef NO_psvDebugScreenInit 445 | return 0; 446 | #else 447 | sceKernelDeleteMutex(mutex); 448 | sceDisplaySetFrameBuf(NULL, SCE_DISPLAY_SETBUF_IMMEDIATE); 449 | return sceKernelFreeMemBlock(displayblock); 450 | #endif 451 | } 452 | 453 | __attribute__((destructor)) static void psvDebugScreenDestructor() { 454 | psvDebugScreenFinish(); 455 | } 456 | 457 | /* 458 | * Draw text onto debug screen 459 | */ 460 | int psvDebugScreenPuts(const char * _text) { 461 | const unsigned char*text = (const unsigned char*)_text; 462 | int c; 463 | unsigned char t; 464 | unsigned char drawDummy; 465 | // 466 | uint32_t *vram; 467 | int bits_per_glyph = ((F)->width * (F)->height); 468 | int bitmap_offset; 469 | unsigned char *font; 470 | int row; 471 | int max_row; 472 | int col; 473 | unsigned char mask; 474 | uint32_t *pixel; 475 | 476 | sceKernelLockMutex(mutex, 1, NULL); 477 | for (c = 0; text[c] ; c++) { 478 | t = text[c]; 479 | // handle CSI sequence 480 | if ((t == '\e') && (text[c+1] == '[')) { 481 | c += psvDebugScreenEscape(text + c + 2) + 2; 482 | if (coordX < 0) coordX = 0; // CSI position are 1-based, 483 | if (coordY < 0) coordY = 0; // prevent 0-based coordinate from producing a negative X/Y 484 | continue; 485 | } 486 | // handle non-printable characters #1 (line-dependent codes) 487 | if (t == '\n') { 488 | coordX = 0; 489 | coordY += (F)->size_h; 490 | continue; 491 | } 492 | if (t == '\r') { 493 | coordX = 0; 494 | continue; 495 | } 496 | // check if glyph fits in line 497 | if ((coordX + (F)->width) > (SCREEN_WIDTH)) { 498 | coordY += (F)->size_h; 499 | coordX = 0; 500 | } 501 | // check if glyph fits in screen 502 | if ((coordY + (F)->height) > (SCREEN_HEIGHT)) { 503 | coordX = coordY = 0; 504 | } 505 | // handle non-printable characters #2 506 | if (t == '\t') { 507 | coordX += (SCREEN_TAB_W) - (coordX % (SCREEN_TAB_W)); 508 | continue; 509 | } 510 | 511 | // draw glyph or dummy glyph (dotted line in the middle) 512 | // works also with not byte-aligned glyphs 513 | vram = ((uint32_t*)base) + coordX + (coordY * (SCREEN_FB_WIDTH)); 514 | row = 0; 515 | // check if glyph is available in font 516 | if ((t > (F)->last) || (t < (F)->first)) { 517 | drawDummy = 1; 518 | bitmap_offset = 0; 519 | font = NULL; 520 | mask = 1 << 7; 521 | } else { 522 | drawDummy = 0; 523 | bitmap_offset = (t - (F)->first) * bits_per_glyph; 524 | font = &(F)->glyphs[ (bitmap_offset / 8) ]; 525 | mask = 1 << 7; 526 | for (col = (bitmap_offset % 8); col > 0; col--, mask >>= 1); 527 | } 528 | // special case: dummy glyph, clear to middle height 529 | max_row = 0; 530 | if (drawDummy) { 531 | max_row = (F)->height / 2; 532 | for (; row < max_row; row++, vram += (SCREEN_FB_WIDTH)) { 533 | pixel = vram; 534 | col = 0; 535 | for (; col < (F)->size_w ; col++) { 536 | *pixel++ = colors.color_bg; 537 | } 538 | } 539 | } 540 | // draw font glyph or dummy glyph 541 | if (drawDummy) { 542 | max_row++; 543 | if (max_row > (F)->height) max_row = (F)->height; 544 | } else { 545 | max_row = (F)->height; 546 | } 547 | for (; row < max_row; row++, vram += (SCREEN_FB_WIDTH)) { 548 | pixel = vram; 549 | col = 0; 550 | for (; col < (F)->width ; col++, mask >>= 1) { 551 | if (drawDummy) { 552 | *pixel++ = (col&1) ? colors.color_fg : colors.color_bg; 553 | } else { 554 | if (!mask) { font++; mask = 1 << 7; } // no more bits: we exhausted this byte 555 | *pixel++ = (*font&mask) ? colors.color_fg : colors.color_bg; 556 | } 557 | } 558 | // right margin 559 | for (; col < (F)->size_w ; col++) 560 | *pixel++ = colors.color_bg; 561 | } 562 | // draw bottom margin 563 | max_row = (F)->size_h; 564 | for (; row < (F)->size_h; row++, vram += (SCREEN_FB_WIDTH)) 565 | for (pixel = vram, col = 0; col < (F)->size_w ; col++) 566 | *pixel++ = colors.color_bg; 567 | // advance X position 568 | coordX += (F)->size_w; 569 | } 570 | sceKernelUnlockMutex(mutex, 1); 571 | return c; 572 | } 573 | 574 | 575 | /* 576 | * Printf text onto debug screen 577 | */ 578 | __attribute__((__format__ (__printf__, 1, 2))) 579 | int psvDebugScreenPrintf(const char *format, ...) { 580 | char buf[4096]; 581 | 582 | va_list opt; 583 | va_start(opt, format); 584 | int ret = vsnprintf(buf, sizeof(buf), format, opt); 585 | psvDebugScreenPuts(buf); 586 | va_end(opt); 587 | 588 | return ret; 589 | } 590 | 591 | /* 592 | * Return copy of color state 593 | */ 594 | void psvDebugScreenGetColorStateCopy(ColorState *copy) { 595 | if (copy) { 596 | memcpy(copy, &colors, sizeof(ColorState)); 597 | CONVERT_RGB_BGR(copy->fgTrueColor); 598 | CONVERT_RGB_BGR(copy->bgTrueColor); 599 | CONVERT_RGB_BGR(copy->color_fg); 600 | CONVERT_RGB_BGR(copy->color_bg); 601 | } 602 | } 603 | 604 | /* 605 | * Return copy of pixel coordinates 606 | */ 607 | void psvDebugScreenGetCoordsXY(int *x, int *y) { 608 | if (x) *x = coordX; 609 | if (y) *y = coordY; 610 | } 611 | 612 | /* 613 | * Set pixel coordinates 614 | */ 615 | void psvDebugScreenSetCoordsXY(int *x, int *y) { 616 | if (x) { 617 | coordX = *x; 618 | if (coordX < 0) coordX = 0; 619 | } 620 | if (y) { 621 | coordY = *y; 622 | if (coordY < 0) coordY = 0; 623 | } 624 | } 625 | 626 | /* 627 | * Return pointer to current font 628 | */ 629 | PsvDebugScreenFont *psvDebugScreenGetFont(void) { 630 | return F; 631 | } 632 | 633 | /* 634 | * Set font 635 | */ 636 | PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font) { 637 | if ((font) && (font->glyphs)) F = font; 638 | return F; 639 | } 640 | 641 | /* 642 | * Return scaled-by-2 copy of font 643 | */ 644 | PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font) { 645 | // works also with not byte-aligned glyphs 646 | PsvDebugScreenFont *target_font; 647 | size_t size; 648 | size_t align; 649 | int glyph; 650 | int row; 651 | int col; 652 | int count; 653 | unsigned char *source_bitmap; 654 | unsigned char source_mask; 655 | unsigned char *target_bitmap, *target_bitmap2; 656 | unsigned char target_mask, target_mask2; 657 | int target_next_row_bytes, target_next_row_bits; 658 | unsigned char pixel; 659 | 660 | if (!source_font) return NULL; 661 | 662 | // allocate target structure and bitmap 663 | target_font = (PsvDebugScreenFont *)malloc(sizeof(PsvDebugScreenFont)); 664 | memset(target_font, 0, sizeof(PsvDebugScreenFont)); 665 | // copy and scale meta information 666 | target_font->width = 2 * source_font->width; 667 | target_font->height = 2 * source_font->height; 668 | target_font->first = source_font->first; 669 | target_font->last = source_font->last; 670 | target_font->size_w = 2 * source_font->size_w; 671 | target_font->size_h = 2 * source_font->size_h; 672 | 673 | // calculate size of target bitmap 674 | size = target_font->width * target_font->height * (target_font->last - target_font->first + 1); 675 | if (size <= 0) { 676 | free(target_font); 677 | return NULL; 678 | } 679 | align = size % 8; 680 | size /= 8; 681 | if (align) size++; 682 | 683 | // allocate and initialize target bitmap 684 | target_font->glyphs = (unsigned char *)malloc(size); 685 | memset(target_font->glyphs, 0, size); 686 | 687 | // scale source bitmap and store in target bitmap 688 | source_bitmap = source_font->glyphs; 689 | source_mask = 1 << 7; 690 | // 691 | target_bitmap = target_font->glyphs; 692 | target_mask = 1 << 7; 693 | target_next_row_bytes = target_font->width / 8; 694 | target_next_row_bits = target_font->width % 8; 695 | // 696 | for (glyph = source_font->first; glyph <= source_font->last; glyph++) { 697 | for (row = source_font->height; row > 0; row--) { 698 | // Find beginning of next target row 699 | target_bitmap2 = target_bitmap + target_next_row_bytes; // advance full bytes 700 | target_mask2 = target_mask; // advance remaining bits 701 | for (col = target_next_row_bits; col > 0; col--, target_mask2 >>= 1) { 702 | if (!target_mask2) { target_bitmap2++; target_mask2 = 1 << 7; } // no more bits: we advance to the next target byte 703 | } 704 | // Get pixel from source bitmap 705 | for (col = source_font->width; col > 0; col--, source_mask >>= 1) { 706 | if (!source_mask) { source_bitmap++; source_mask = 1 << 7; } // no more bits: we advance to the next source byte 707 | pixel = *source_bitmap & source_mask; 708 | // Put pixels into target bitmap 709 | for (count = 2; count > 0; count--) { 710 | // duplicate column in origial row 711 | if (!target_mask) { target_bitmap++; target_mask = 1 << 7; } // no more bits: we advance to the next target byte 712 | if (pixel) *target_bitmap |= target_mask; 713 | target_mask >>= 1; 714 | // duplicate column in duplicated row 715 | if (!target_mask2) { target_bitmap2++; target_mask2 = 1 << 7; } // no more bits: we advance to the next target byte 716 | if (pixel) *target_bitmap2 |= target_mask2; 717 | target_mask2 >>= 1; 718 | } 719 | } 720 | // Next target row is directly behind duplicated row 721 | target_bitmap = target_bitmap2; 722 | target_mask = target_mask2; 723 | } 724 | } 725 | 726 | return target_font; 727 | } 728 | 729 | #undef SCREEN_TAB_W 730 | #undef F 731 | 732 | #endif 733 | -------------------------------------------------------------------------------- /vita_llm.c: -------------------------------------------------------------------------------- 1 | /* Inference for Llama-2 Transformer model in pure C */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "../common/debugScreen.h" 20 | #if defined _WIN32 21 | #include "win.h" 22 | #else 23 | 24 | #endif 25 | 26 | // colors 27 | #define COLOR_RESET "\x1b[0m" 28 | #define COLOR_RED "\x1b[31m" 29 | #define COLOR_GREEN "\x1b[32m" 30 | #define COLOR_YELLOW "\x1b[33m" 31 | #define COLOR_CYAN "\x1b[36m" 32 | 33 | // Fwd declaration 34 | void download_file(const char* url, const char* filepath); 35 | 36 | // ---------------------------------------------------------------------------- 37 | // Transformer model 38 | 39 | typedef struct { 40 | char* model_path; 41 | char* tokenizer_path; 42 | char* name; 43 | char* model_url; 44 | char* tokenizer_url; 45 | char* tokenizer_name; 46 | } ModelSuite; 47 | 48 | typedef struct { 49 | int dim; // transformer dimension 50 | int hidden_dim; // for ffn layers 51 | int n_layers; // number of layers 52 | int n_heads; // number of query heads 53 | int n_kv_heads; // number of key/value heads (can be < query heads because of multiquery) 54 | int vocab_size; // vocabulary size, usually 256 (byte-level) 55 | int seq_len; // max sequence length 56 | } Config; 57 | 58 | typedef struct { 59 | // token embedding table 60 | float* token_embedding_table; // (vocab_size, dim) 61 | // weights for rmsnorms 62 | float* rms_att_weight; // (layer, dim) rmsnorm weights 63 | float* rms_ffn_weight; // (layer, dim) 64 | // weights for matmuls. note dim == n_heads * head_size 65 | float* wq; // (layer, dim, n_heads * head_size) 66 | float* wk; // (layer, dim, n_kv_heads * head_size) 67 | float* wv; // (layer, dim, n_kv_heads * head_size) 68 | float* wo; // (layer, n_heads * head_size, dim) 69 | // weights for ffn 70 | float* w1; // (layer, hidden_dim, dim) 71 | float* w2; // (layer, dim, hidden_dim) 72 | float* w3; // (layer, hidden_dim, dim) 73 | // final rmsnorm 74 | float* rms_final_weight; // (dim,) 75 | // (optional) classifier weights for the logits, on the last layer 76 | float* wcls; 77 | } TransformerWeights; 78 | 79 | typedef struct { 80 | // current wave of activations 81 | float *x; // activation at current time stamp (dim,) 82 | float *xb; // same, but inside a residual branch (dim,) 83 | float *xb2; // an additional buffer just for convenience (dim,) 84 | float *hb; // buffer for hidden dimension in the ffn (hidden_dim,) 85 | float *hb2; // buffer for hidden dimension in the ffn (hidden_dim,) 86 | float *q; // query (dim,) 87 | float *k; // key (dim,) 88 | float *v; // value (dim,) 89 | float *att; // buffer for scores/attention values (n_heads, seq_len) 90 | float *logits; // output logits 91 | // kv cache 92 | float* key_cache; // (layer, seq_len, dim) 93 | float* value_cache; // (layer, seq_len, dim) 94 | } RunState; 95 | 96 | typedef struct { 97 | Config config; // the hyperparameters of the architecture (the blueprint) 98 | TransformerWeights weights; // the weights of the model 99 | RunState state; // buffers for the "wave" of activations in the forward pass 100 | // some more state needed to properly clean up the memory mapping (sigh) 101 | int fd; // file descriptor for memory mapping 102 | float* data; // memory mapped data pointer 103 | ssize_t file_size; // size of the checkpoint file in bytes 104 | } Transformer; 105 | 106 | void malloc_run_state(RunState* s, Config* p) { 107 | // we calloc instead of malloc to keep valgrind happy 108 | int kv_dim = (p->dim * p->n_kv_heads) / p->n_heads; 109 | s->x = calloc(p->dim, sizeof(float)); 110 | s->xb = calloc(p->dim, sizeof(float)); 111 | s->xb2 = calloc(p->dim, sizeof(float)); 112 | s->hb = calloc(p->hidden_dim, sizeof(float)); 113 | s->hb2 = calloc(p->hidden_dim, sizeof(float)); 114 | s->q = calloc(p->dim, sizeof(float)); 115 | s->key_cache = calloc(p->n_layers * p->seq_len * kv_dim, sizeof(float)); 116 | s->value_cache = calloc(p->n_layers * p->seq_len * kv_dim, sizeof(float)); 117 | s->att = calloc(p->n_heads * p->seq_len, sizeof(float)); 118 | s->logits = calloc(p->vocab_size, sizeof(float)); 119 | // ensure all mallocs went fine 120 | if (!s->x || !s->xb || !s->xb2 || !s->hb || !s->hb2 || !s->q 121 | || !s->key_cache || !s->value_cache || !s->att || !s->logits) { 122 | psvDebugScreenPrintf("malloc failed!\n"); 123 | sceKernelExitProcess(0); 124 | } 125 | } 126 | 127 | void free_run_state(RunState* s) { 128 | free(s->x); 129 | free(s->xb); 130 | free(s->xb2); 131 | free(s->hb); 132 | free(s->hb2); 133 | free(s->q); 134 | free(s->att); 135 | free(s->logits); 136 | free(s->key_cache); 137 | free(s->value_cache); 138 | } 139 | 140 | void memory_map_weights(TransformerWeights *w, Config* p, float* ptr, int shared_weights) { 141 | int head_size = p->dim / p->n_heads; 142 | // make sure the multiplications below are done in 64bit to fit the parameter counts of 13B+ models 143 | unsigned long long n_layers = p->n_layers; 144 | w->token_embedding_table = ptr; 145 | ptr += p->vocab_size * p->dim; 146 | w->rms_att_weight = ptr; 147 | ptr += n_layers * p->dim; 148 | w->wq = ptr; 149 | ptr += n_layers * p->dim * (p->n_heads * head_size); 150 | w->wk = ptr; 151 | ptr += n_layers * p->dim * (p->n_kv_heads * head_size); 152 | w->wv = ptr; 153 | ptr += n_layers * p->dim * (p->n_kv_heads * head_size); 154 | w->wo = ptr; 155 | ptr += n_layers * (p->n_heads * head_size) * p->dim; 156 | w->rms_ffn_weight = ptr; 157 | ptr += n_layers * p->dim; 158 | w->w1 = ptr; 159 | ptr += n_layers * p->dim * p->hidden_dim; 160 | w->w2 = ptr; 161 | ptr += n_layers * p->hidden_dim * p->dim; 162 | w->w3 = ptr; 163 | ptr += n_layers * p->dim * p->hidden_dim; 164 | w->rms_final_weight = ptr; 165 | ptr += p->dim; 166 | ptr += p->seq_len * head_size / 2; // skip what used to be freq_cis_real (for RoPE) 167 | ptr += p->seq_len * head_size / 2; // skip what used to be freq_cis_imag (for RoPE) 168 | w->wcls = shared_weights ? w->token_embedding_table : ptr; 169 | } 170 | 171 | void read_checkpoint(char* checkpoint, Config* config, TransformerWeights* weights, 172 | int* fd, float** data, ssize_t* file_size) { 173 | psvDebugScreenPrintf(COLOR_CYAN "Reading checkpoint: %s\n" COLOR_RESET, checkpoint); 174 | FILE *file = fopen(checkpoint, "rb"); 175 | if (!file) { psvDebugScreenPrintf(COLOR_RED "Error: Couldn't open file %s\n" COLOR_RESET, checkpoint); sceKernelExitProcess(0); } 176 | psvDebugScreenPrintf(COLOR_CYAN "File opened. Reading config...\n" COLOR_RESET); 177 | // read in the config header 178 | if (fread(config, sizeof(Config), 1, file) != 1) { sceKernelExitProcess(0); } 179 | // negative vocab size is hacky way of signaling unshared weights. bit yikes. 180 | int shared_weights = config->vocab_size > 0 ? 1 : 0; 181 | config->vocab_size = abs(config->vocab_size); 182 | // figure out the file size 183 | fseek(file, 0, SEEK_END); // move file pointer to end of file 184 | *file_size = ftell(file); // get the file size, in bytes 185 | fseek(file, 0, SEEK_SET); // rewind to beginning of file 186 | psvDebugScreenPrintf("Model file size: %ld bytes.\n", *file_size); 187 | // allocate memory for the weights 188 | psvDebugScreenPrintf(COLOR_CYAN "Attempting to allocate memory for weights...\n" COLOR_RESET); 189 | sceKernelDelayThread(500 * 1000); 190 | *data = (float*)malloc(*file_size); 191 | if (!*data) { 192 | psvDebugScreenPrintf(COLOR_RED "FATAL: malloc failed for weights!\n" COLOR_RESET); 193 | while(1) { sceKernelDelayThread(1000 * 1000); } 194 | } 195 | psvDebugScreenPrintf(COLOR_GREEN "Memory allocated successfully.\n" COLOR_RESET); 196 | // read in the weights 197 | psvDebugScreenPrintf(COLOR_CYAN "Reading weights...\n" COLOR_RESET); 198 | if (fread(*data, 1, *file_size, file) != *file_size) { 199 | psvDebugScreenPrintf(COLOR_RED "Error: failed to read weights!\n" COLOR_RESET); 200 | sceKernelExitProcess(0); 201 | } 202 | fclose(file); 203 | psvDebugScreenPrintf(COLOR_CYAN "Weights read. Mapping weights...\n" COLOR_RESET); 204 | // memory map the Transformer weights into the data pointer 205 | float* weights_ptr = *data + sizeof(Config)/sizeof(float); 206 | memory_map_weights(weights, config, weights_ptr, shared_weights); 207 | psvDebugScreenPrintf(COLOR_GREEN "Checkpoint loaded successfully.\n" COLOR_RESET); 208 | } 209 | 210 | void build_transformer(Transformer *t, char* checkpoint_path) { 211 | // read in the Config and the Weights from the checkpoint 212 | read_checkpoint(checkpoint_path, &t->config, &t->weights, &t->fd, &t->data, &t->file_size); 213 | // allocate the RunState buffers 214 | malloc_run_state(&t->state, &t->config); 215 | } 216 | 217 | void free_transformer(Transformer* t) { 218 | // free the weights 219 | if (t->data) { free(t->data); } 220 | // free the RunState buffers 221 | free_run_state(&t->state); 222 | } 223 | 224 | // ---------------------------------------------------------------------------- 225 | // neural net blocks; the dynamics of the Transformer 226 | 227 | void rmsnorm(float* o, float* x, float* weight, int size) { 228 | // calculate sum of squares 229 | float ss = 0.0f; 230 | for (int j = 0; j < size; j++) { 231 | ss += x[j] * x[j]; 232 | } 233 | ss /= size; 234 | ss += 1e-5f; 235 | ss = 1.0f / sqrtf(ss); 236 | // normalize and scale 237 | for (int j = 0; j < size; j++) { 238 | o[j] = weight[j] * (ss * x[j]); 239 | } 240 | } 241 | 242 | void softmax(float* x, int size) { 243 | // find max value (for numerical stability) 244 | float max_val = x[0]; 245 | for (int i = 1; i < size; i++) { 246 | if (x[i] > max_val) { 247 | max_val = x[i]; 248 | } 249 | } 250 | // exp and sum 251 | float sum = 0.0f; 252 | for (int i = 0; i < size; i++) { 253 | x[i] = expf(x[i] - max_val); 254 | sum += x[i]; 255 | } 256 | // normalize 257 | for (int i = 0; i < size; i++) { 258 | x[i] /= sum; 259 | } 260 | } 261 | 262 | void matmul(float* xout, float* x, float* w, int n, int d) { 263 | // W (d,n) @ x (n,) -> xout (d,) 264 | // by far the most amount of time is spent inside this little function 265 | int i; 266 | // #pragma omp parallel for private(i) 267 | for (i = 0; i < d; i++) { 268 | float val = 0.0f; 269 | for (int j = 0; j < n; j++) { 270 | val += w[i * n + j] * x[j]; 271 | } 272 | xout[i] = val; 273 | } 274 | } 275 | 276 | float* forward(Transformer* transformer, int token, int pos) { 277 | 278 | // a few convenience variables 279 | Config* p = &transformer->config; 280 | TransformerWeights* w = &transformer->weights; 281 | RunState* s = &transformer->state; 282 | float *x = s->x; 283 | int dim = p->dim; 284 | int kv_dim = (p->dim * p->n_kv_heads) / p->n_heads; 285 | int kv_mul = p->n_heads / p->n_kv_heads; // integer multiplier of the kv sharing in multiquery 286 | int hidden_dim = p->hidden_dim; 287 | int head_size = dim / p->n_heads; 288 | 289 | // copy the token embedding into x 290 | float* content_row = w->token_embedding_table + token * dim; 291 | memcpy(x, content_row, dim*sizeof(*x)); 292 | 293 | // forward all the layers 294 | for(unsigned long long l = 0; l < p->n_layers; l++) { 295 | 296 | // attention rmsnorm 297 | rmsnorm(s->xb, x, w->rms_att_weight + l*dim, dim); 298 | 299 | // key and value point to the kv cache 300 | int loff = l * p->seq_len * kv_dim; // kv cache layer offset for convenience 301 | s->k = s->key_cache + loff + pos * kv_dim; 302 | s->v = s->value_cache + loff + pos * kv_dim; 303 | 304 | // qkv matmuls for this position 305 | matmul(s->q, s->xb, w->wq + l*dim*dim, dim, dim); 306 | matmul(s->k, s->xb, w->wk + l*dim*kv_dim, dim, kv_dim); 307 | matmul(s->v, s->xb, w->wv + l*dim*kv_dim, dim, kv_dim); 308 | 309 | // RoPE relative positional encoding: complex-valued rotate q and k in each head 310 | for (int i = 0; i < dim; i+=2) { 311 | int head_dim = i % head_size; 312 | float freq = 1.0f / powf(10000.0f, head_dim / (float)head_size); 313 | float val = pos * freq; 314 | float fcr = cosf(val); 315 | float fci = sinf(val); 316 | int rotn = i < kv_dim ? 2 : 1; // how many vectors? 2 = q & k, 1 = q only 317 | for (int v = 0; v < rotn; v++) { 318 | float* vec = v == 0 ? s->q : s->k; // the vector to rotate (query or key) 319 | float v0 = vec[i]; 320 | float v1 = vec[i+1]; 321 | vec[i] = v0 * fcr - v1 * fci; 322 | vec[i+1] = v0 * fci + v1 * fcr; 323 | } 324 | } 325 | 326 | // multihead attention. iterate over all heads 327 | int h; 328 | // #pragma omp parallel for private(h) 329 | for (h = 0; h < p->n_heads; h++) { 330 | // get the query vector for this head 331 | float* q = s->q + h * head_size; 332 | // attention scores for this head 333 | float* att = s->att + h * p->seq_len; 334 | // iterate over all timesteps, including the current one 335 | for (int t = 0; t <= pos; t++) { 336 | // get the key vector for this head and at this timestep 337 | float* k = s->key_cache + loff + t * kv_dim + (h / kv_mul) * head_size; 338 | // calculate the attention score as the dot product of q and k 339 | float score = 0.0f; 340 | for (int i = 0; i < head_size; i++) { 341 | score += q[i] * k[i]; 342 | } 343 | score /= sqrtf(head_size); 344 | // save the score to the attention buffer 345 | att[t] = score; 346 | } 347 | 348 | // softmax the scores to get attention weights, from 0..pos inclusively 349 | softmax(att, pos + 1); 350 | 351 | // weighted sum of the values, store back into xb 352 | float* xb = s->xb + h * head_size; 353 | memset(xb, 0, head_size * sizeof(float)); 354 | for (int t = 0; t <= pos; t++) { 355 | // get the value vector for this head and at this timestep 356 | float* v = s->value_cache + loff + t * kv_dim + (h / kv_mul) * head_size; 357 | // get the attention weight for this timestep 358 | float a = att[t]; 359 | // accumulate the weighted value into xb 360 | for (int i = 0; i < head_size; i++) { 361 | xb[i] += a * v[i]; 362 | } 363 | } 364 | } 365 | 366 | // final matmul to get the output of the attention 367 | matmul(s->xb2, s->xb, w->wo + l*dim*dim, dim, dim); 368 | 369 | // residual connection back into x 370 | for (int i = 0; i < dim; i++) { 371 | x[i] += s->xb2[i]; 372 | } 373 | 374 | // ffn rmsnorm 375 | rmsnorm(s->xb, x, w->rms_ffn_weight + l*dim, dim); 376 | 377 | // Now for FFN in PyTorch we have: self.w2(F.silu(self.w1(x)) * self.w3(x)) 378 | // first calculate self.w1(x) and self.w3(x) 379 | matmul(s->hb, s->xb, w->w1 + l*dim*hidden_dim, dim, hidden_dim); 380 | matmul(s->hb2, s->xb, w->w3 + l*dim*hidden_dim, dim, hidden_dim); 381 | 382 | // SwiGLU non-linearity 383 | for (int i = 0; i < hidden_dim; i++) { 384 | float val = s->hb[i]; 385 | // silu(x)=x*σ(x), where σ(x) is the logistic sigmoid 386 | val *= (1.0f / (1.0f + expf(-val))); 387 | // elementwise multiply with w3(x) 388 | val *= s->hb2[i]; 389 | s->hb[i] = val; 390 | } 391 | 392 | // final matmul to get the output of the ffn 393 | matmul(s->xb, s->hb, w->w2 + l*dim*hidden_dim, hidden_dim, dim); 394 | 395 | // residual connection 396 | for (int i = 0; i < dim; i++) { 397 | x[i] += s->xb[i]; 398 | } 399 | } 400 | 401 | // final rmsnorm 402 | rmsnorm(x, x, w->rms_final_weight, dim); 403 | 404 | // classifier into logits 405 | matmul(s->logits, x, w->wcls, p->dim, p->vocab_size); 406 | return s->logits; 407 | } 408 | 409 | // ---------------------------------------------------------------------------- 410 | // The Byte Pair Encoding (BPE) Tokenizer that translates strings <-> tokens 411 | 412 | typedef struct { 413 | char *str; 414 | int id; 415 | } TokenIndex; 416 | 417 | typedef struct { 418 | char** vocab; 419 | float* vocab_scores; 420 | TokenIndex *sorted_vocab; 421 | int vocab_size; 422 | unsigned int max_token_length; 423 | unsigned char byte_pieces[512]; // stores all single-byte strings 424 | } Tokenizer; 425 | 426 | int compare_tokens(const void *a, const void *b) { 427 | return strcmp(((TokenIndex*)a)->str, ((TokenIndex*)b)->str); 428 | } 429 | 430 | void build_tokenizer(Tokenizer* t, char* tokenizer_path, int vocab_size) { 431 | t->vocab_size = vocab_size; 432 | psvDebugScreenPrintf("Tokenizer vocab size: %d\n", vocab_size); 433 | // i should have written the vocab file as a simple text file, i forgive myself... 434 | t->vocab = (char**)malloc(vocab_size * sizeof(char*)); 435 | t->vocab_scores = (float*)malloc(vocab_size * sizeof(float)); 436 | t->sorted_vocab = NULL; // initialized lazily 437 | for (int i = 0; i < 256; i++) { 438 | t->byte_pieces[i * 2] = (unsigned char)i; 439 | t->byte_pieces[i * 2 + 1] = '\0'; 440 | } 441 | // read in the file 442 | FILE *file = fopen(tokenizer_path, "rb"); 443 | if (!file) { psvDebugScreenPrintf("couldn't load %s\n", tokenizer_path); sceKernelExitProcess(0); } 444 | if (fread(&t->max_token_length, sizeof(int), 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0); } 445 | int len; 446 | for (int i = 0; i < vocab_size; i++) { 447 | if (fread(t->vocab_scores + i, sizeof(float), 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0);} 448 | if (fread(&len, sizeof(int), 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0); } 449 | t->vocab[i] = (char *)malloc(len + 1); 450 | if (fread(t->vocab[i], len, 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0); } 451 | t->vocab[i][len] = '\0'; // add the string terminating token 452 | } 453 | fclose(file); 454 | } 455 | 456 | void free_tokenizer(Tokenizer* t) { 457 | for (int i = 0; i < t->vocab_size; i++) { free(t->vocab[i]); } 458 | free(t->vocab); 459 | free(t->vocab_scores); 460 | free(t->sorted_vocab); 461 | } 462 | 463 | char* decode(Tokenizer* t, int prev_token, int token) { 464 | char *piece = t->vocab[token]; 465 | // following BOS (1) token, sentencepiece decoder strips any leading whitespace (see PR #89) 466 | if (prev_token == 1 && piece[0] == ' ') { piece++; } 467 | // careful, some tokens designate raw bytes, and look like e.g. '<0x01>' 468 | // parse this and convert and return the actual byte 469 | unsigned char byte_val; 470 | if (sscanf(piece, "<0x%02hhX>", &byte_val) == 1) { 471 | piece = (char*)t->byte_pieces + byte_val * 2; 472 | } 473 | return piece; 474 | } 475 | 476 | void safe_printf(char *piece) { 477 | // piece might be a raw byte token, and we only want to print printable chars or whitespace 478 | // because some of the other bytes can be various control codes, backspace, etc. 479 | if (piece == NULL) { return; } 480 | if (piece[0] == '\0') { return; } 481 | if (piece[1] == '\0') { 482 | unsigned char byte_val = piece[0]; 483 | if (!(isprint(byte_val) || isspace(byte_val))) { 484 | return; // bad byte, don't print it 485 | } 486 | } 487 | psvDebugScreenPrintf("%s", piece); 488 | } 489 | 490 | int str_lookup(char *str, TokenIndex *sorted_vocab, int vocab_size) { 491 | // efficiently find the perfect match for str in vocab, return its index or -1 if not found 492 | TokenIndex tok = { .str = str }; // acts as the key to search for 493 | TokenIndex *res = bsearch(&tok, sorted_vocab, vocab_size, sizeof(TokenIndex), compare_tokens); 494 | return res != NULL ? res->id : -1; 495 | } 496 | 497 | void encode(Tokenizer* t, char *text, int8_t bos, int8_t eos, int *tokens, int *n_tokens) { 498 | // encode the string text (input) into an upper-bound preallocated tokens[] array 499 | // bos != 0 means prepend the BOS token (=1), eos != 0 means append the EOS token (=2) 500 | if (text == NULL) { psvDebugScreenPrintf("cannot encode NULL text\n"); sceKernelExitProcess(0); } 501 | 502 | psvDebugScreenPrintf("Inside encode, vocab size: %d\n", t->vocab_size); 503 | if (t->sorted_vocab == NULL) { 504 | psvDebugScreenPrintf("Lazily sorting vocab...\n"); 505 | // lazily sort the vocabulary 506 | t->sorted_vocab = malloc(t->vocab_size * sizeof(TokenIndex)); 507 | for (int i = 0; i < t->vocab_size; i++) { 508 | t->sorted_vocab[i].str = t->vocab[i]; 509 | t->sorted_vocab[i].id = i; 510 | } 511 | qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens); 512 | } 513 | 514 | // create a temporary buffer that will store merge candidates of always two consecutive tokens 515 | // *2 for concat, +1 for null terminator +2 for UTF8 (in case max_token_length is 1) 516 | char* str_buffer = malloc((t->max_token_length*2 +1 +2) * sizeof(char)); 517 | size_t str_len = 0; 518 | 519 | // start at 0 tokens 520 | *n_tokens = 0; 521 | 522 | // add optional BOS (=1) token, if desired 523 | if (bos) tokens[(*n_tokens)++] = 1; 524 | 525 | // add_dummy_prefix is true by default 526 | // so prepend a dummy prefix token to the input string, but only if text != "" 527 | // TODO: pretty sure this isn't correct in the general case but I don't have the 528 | // energy to read more of the sentencepiece code to figure out what it's doing 529 | if (text[0] != '\0') { 530 | int dummy_prefix = str_lookup(" ", t->sorted_vocab, t->vocab_size); 531 | tokens[(*n_tokens)++] = dummy_prefix; 532 | } 533 | 534 | // Okay UTF-8 time. This will get messy. Here is the reference from Wikipedia: 535 | // Code point ↔ UTF-8 conversion 536 | // First code point Last code point Byte 1 Byte 2 Byte 3 Byte 4 537 | // U+0000 U+007F 0xxxxxxx 538 | // U+0080 U+07FF 110xxxxx 10xxxxxx 539 | // U+0800 U+FFFF 1110xxxx 10xxxxxx 10xxxxxx 540 | // U+10000 U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 541 | 542 | // process the raw (UTF-8) byte sequence of the input string 543 | for (char *c = text; *c != '\0'; c++) { 544 | 545 | // reset buffer if the current byte is ASCII or a leading byte 546 | // 0xC0 is 11000000, so (*c & 0xC0) keeps the first 2 bits and zeros the rest 547 | // 0x80 is 10000000 548 | // in UTF-8, all continuation bytes start with "10" in first two bits 549 | // so in English this is: "if this byte is not a continuation byte" 550 | if ((*c & 0xC0) != 0x80) { 551 | // this byte must be either a leading byte (11...) or an ASCII char (0x...) 552 | // => reset our location, as we're starting a new UTF-8 codepoint 553 | str_len = 0; 554 | } 555 | 556 | // append the current byte to the buffer 557 | str_buffer[str_len++] = *c; // ++ is post-increment, incremented after this line 558 | str_buffer[str_len] = '\0'; 559 | 560 | // while the next character is a continuation byte, continue appending 561 | // but if there are too many of them, just stop to avoid overruning str_buffer size. 562 | if ((*(c+1) & 0xC0) == 0x80 && str_len < 4) { 563 | continue; 564 | } 565 | 566 | // ok c+1 is not a continuation byte, so we've read in a full codepoint 567 | int id = str_lookup(str_buffer, t->sorted_vocab, t->vocab_size); 568 | 569 | if (id != -1) { 570 | // we found this codepoint in vocab, add it as a token 571 | tokens[(*n_tokens)++] = id; 572 | } else { 573 | // byte_fallback encoding: just encode each byte as a token 574 | // +3 is here because the first 3 vocab elements are , , 575 | // so the individual bytes only start at index 3 576 | for (int i=0; i < str_len; i++) { 577 | tokens[(*n_tokens)++] = (unsigned char)str_buffer[i] + 3; 578 | } 579 | } 580 | str_len = 0; // protect against a sequence of stray UTF8 continuation bytes 581 | } 582 | 583 | // merge the best consecutive pair each iteration, according the scores in vocab_scores 584 | while (1) { 585 | float best_score = -1e10; 586 | int best_id = -1; 587 | int best_idx = -1; 588 | 589 | for (int i=0; i < (*n_tokens-1); i++) { 590 | // check if we can merge the pair (tokens[i], tokens[i+1]) 591 | sprintf(str_buffer, "%s%s", t->vocab[tokens[i]], t->vocab[tokens[i+1]]); 592 | int id = str_lookup(str_buffer, t->sorted_vocab, t->vocab_size); 593 | if (id != -1 && t->vocab_scores[id] > best_score) { 594 | // this merge pair exists in vocab! record its score and position 595 | best_score = t->vocab_scores[id]; 596 | best_id = id; 597 | best_idx = i; 598 | } 599 | } 600 | 601 | if (best_idx == -1) { 602 | break; // we couldn't find any more pairs to merge, so we're done 603 | } 604 | 605 | // merge the consecutive pair (best_idx, best_idx+1) into new token best_id 606 | tokens[best_idx] = best_id; 607 | // delete token at position best_idx+1, shift the entire sequence back 1 608 | for (int i = best_idx+1; i < (*n_tokens-1); i++) { 609 | tokens[i] = tokens[i+1]; 610 | } 611 | (*n_tokens)--; // token length decreased 612 | } 613 | 614 | // add optional EOS (=2) token, if desired 615 | if (eos) tokens[(*n_tokens)++] = 2; 616 | 617 | free(str_buffer); 618 | } 619 | 620 | // ---------------------------------------------------------------------------- 621 | // The Sampler, which takes logits and returns a sampled token 622 | // sampling can be done in a few ways: greedy argmax, sampling, top-p sampling 623 | 624 | typedef struct { 625 | float prob; 626 | int index; 627 | } ProbIndex; // struct used when sorting probabilities during top-p sampling 628 | 629 | typedef struct { 630 | int vocab_size; 631 | ProbIndex* probindex; // buffer used in top-p sampling 632 | float temperature; 633 | float topp; 634 | unsigned long long rng_state; 635 | } Sampler; 636 | 637 | int sample_argmax(float* probabilities, int n) { 638 | // return the index that has the highest probability 639 | int max_i = 0; 640 | float max_p = probabilities[0]; 641 | for (int i = 1; i < n; i++) { 642 | if (probabilities[i] > max_p) { 643 | max_i = i; 644 | max_p = probabilities[i]; 645 | } 646 | } 647 | return max_i; 648 | } 649 | 650 | int sample_mult(float* probabilities, int n, float coin) { 651 | // sample index from probabilities (they must sum to 1!) 652 | // coin is a random number in [0, 1), usually from random_f32() 653 | float cdf = 0.0f; 654 | for (int i = 0; i < n; i++) { 655 | cdf += probabilities[i]; 656 | if (coin < cdf) { 657 | return i; 658 | } 659 | } 660 | return n - 1; // in case of rounding errors 661 | } 662 | 663 | int compare(const void* a, const void* b) { 664 | ProbIndex* a_ = (ProbIndex*) a; 665 | ProbIndex* b_ = (ProbIndex*) b; 666 | if (a_->prob > b_->prob) return -1; 667 | if (a_->prob < b_->prob) return 1; 668 | return 0; 669 | } 670 | 671 | int sample_topp(float* probabilities, int n, float topp, ProbIndex* probindex, float coin) { 672 | // top-p sampling (or "nucleus sampling") samples from the smallest set of 673 | // tokens that exceed probability topp. This way we never sample tokens that 674 | // have very low probabilities and are less likely to go "off the rails". 675 | // coin is a random number in [0, 1), usually from random_f32() 676 | 677 | int n0 = 0; 678 | // quicksort indices in descending order of probabilities 679 | // values smaller than (1 - topp) / (n - 1) cannot be part of the result 680 | // so for efficiency we crop these out as candidates before sorting 681 | const float cutoff = (1.0f - topp) / (n - 1); 682 | for (int i = 0; i < n; i++) { 683 | if (probabilities[i] >= cutoff) { 684 | probindex[n0].index = i; 685 | probindex[n0].prob = probabilities[i]; 686 | n0++; 687 | } 688 | } 689 | qsort(probindex, n0, sizeof(ProbIndex), compare); 690 | 691 | // truncate the list where cumulative probability exceeds topp 692 | float cumulative_prob = 0.0f; 693 | int last_idx = n0 - 1; // in case of rounding errors consider all elements 694 | for (int i = 0; i < n0; i++) { 695 | cumulative_prob += probindex[i].prob; 696 | if (cumulative_prob > topp) { 697 | last_idx = i; 698 | break; // we've exceeded topp by including last_idx 699 | } 700 | } 701 | 702 | // sample from the truncated list 703 | float r = coin * cumulative_prob; 704 | float cdf = 0.0f; 705 | for (int i = 0; i <= last_idx; i++) { 706 | cdf += probindex[i].prob; 707 | if (r < cdf) { 708 | return probindex[i].index; 709 | } 710 | } 711 | return probindex[last_idx].index; // in case of rounding errors 712 | } 713 | 714 | void build_sampler(Sampler* sampler, int vocab_size, float temperature, float topp, unsigned long long rng_seed) { 715 | sampler->vocab_size = vocab_size; 716 | sampler->temperature = temperature; 717 | sampler->topp = topp; 718 | sampler->rng_state = rng_seed; 719 | // buffer only used with nucleus sampling; may not need but it's ~small 720 | sampler->probindex = malloc(sampler->vocab_size * sizeof(ProbIndex)); 721 | } 722 | 723 | void free_sampler(Sampler* sampler) { 724 | free(sampler->probindex); 725 | } 726 | 727 | unsigned int random_u32(unsigned long long *state) { 728 | // xorshift rng: https://en.wikipedia.org/wiki/Xorshift#xorshift.2A 729 | *state ^= *state >> 12; 730 | *state ^= *state << 25; 731 | *state ^= *state >> 27; 732 | return (*state * 0x2545F4914F6CDD1Dull) >> 32; 733 | } 734 | float random_f32(unsigned long long *state) { // random float32 in [0,1) 735 | return (random_u32(state) >> 8) / 16777216.0f; 736 | } 737 | 738 | int sample(Sampler* sampler, float* logits) { 739 | // sample the token given the logits and some hyperparameters 740 | int next; 741 | if (sampler->temperature == 0.0f) { 742 | // greedy argmax sampling: take the token with the highest probability 743 | next = sample_argmax(logits, sampler->vocab_size); 744 | } else { 745 | // apply the temperature to the logits 746 | for (int q=0; qvocab_size; q++) { logits[q] /= sampler->temperature; } 747 | // apply softmax to the logits to get the probabilities for next token 748 | softmax(logits, sampler->vocab_size); 749 | // flip a (float) coin (this is our source of entropy for sampling) 750 | float coin = random_f32(&sampler->rng_state); 751 | // we sample from this distribution to get the next token 752 | if (sampler->topp <= 0 || sampler->topp >= 1) { 753 | // simply sample from the predicted probability distribution 754 | next = sample_mult(logits, sampler->vocab_size, coin); 755 | } else { 756 | // top-p (nucleus) sampling, clamping the least likely tokens to zero 757 | next = sample_topp(logits, sampler->vocab_size, sampler->topp, sampler->probindex, coin); 758 | } 759 | } 760 | return next; 761 | } 762 | 763 | // ---------------------------------------------------------------------------- 764 | // utilities: time 765 | 766 | long time_in_ms() { 767 | // return time in milliseconds, for benchmarking the model speed 768 | struct timespec time; 769 | clock_gettime(CLOCK_REALTIME, &time); 770 | return time.tv_sec * 1000 + time.tv_nsec / 1000000; 771 | } 772 | 773 | // ---------------------------------------------------------------------------- 774 | // generation loop 775 | 776 | void generate(Transformer *transformer, Tokenizer *tokenizer, Sampler *sampler, char *prompt, int steps) { 777 | char *empty_prompt = ""; 778 | if (prompt == NULL) { prompt = empty_prompt; } 779 | 780 | // encode the (string) prompt into tokens sequence 781 | int num_prompt_tokens = 0; 782 | int* prompt_tokens = (int*)malloc((strlen(prompt)+3) * sizeof(int)); // +3 for '\0', ?BOS, ?EOS 783 | encode(tokenizer, prompt, 1, 0, prompt_tokens, &num_prompt_tokens); 784 | if (num_prompt_tokens < 1) { 785 | psvDebugScreenPrintf("something is wrong, expected at least 1 prompt token\n"); 786 | sceKernelExitProcess(0); 787 | } 788 | 789 | // start the main loop 790 | long start = 0; // used to time our code, only initialized after first iteration 791 | int next; // will store the next token in the sequence 792 | int token = prompt_tokens[0]; // kick off with the first token in the prompt 793 | int pos = 0; // position in the sequence 794 | 795 | psvDebugScreenPrintf("\n%s", prompt); 796 | 797 | while (pos < steps) { 798 | // forward the transformer to get logits for the next token 799 | float* logits = forward(transformer, token, pos); 800 | 801 | // advance the state machine 802 | if (pos < num_prompt_tokens - 1) { 803 | // if we are still processing the input prompt, force the next prompt token 804 | next = prompt_tokens[pos + 1]; 805 | } else { 806 | // otherwise sample the next token from the logits 807 | next = sample(sampler, logits); 808 | 809 | // print the token as string, decode it with the Tokenizer object 810 | char* piece = decode(tokenizer, token, next); 811 | safe_printf(piece); 812 | } 813 | pos++; 814 | 815 | // data-dependent terminating condition: the BOS (=1) token delimits sequences 816 | if (next == 1) { break; } 817 | token = next; 818 | 819 | // init the timer here because the first iteration can be slower 820 | if (start == 0) { start = time_in_ms(); } 821 | } 822 | psvDebugScreenPrintf("\n"); 823 | 824 | // report achieved tok/s (pos-1 because the timer starts after first iteration) 825 | if (pos > 1) { 826 | long end = time_in_ms(); 827 | psvDebugScreenPrintf(COLOR_YELLOW "\nachieved tok/s: %f\n" COLOR_RESET, (pos-1) / (double)(end-start)*1000); 828 | } 829 | 830 | free(prompt_tokens); 831 | } 832 | 833 | void read_stdin(const char* guide, char* buffer, size_t bufsize) { 834 | // read a line from stdin, up to but not including \n 835 | printf("%s", guide); 836 | if (fgets(buffer, bufsize, stdin) != NULL) { 837 | size_t len = strlen(buffer); 838 | if (len > 0 && buffer[len - 1] == '\n') { 839 | buffer[len - 1] = '\0'; // strip newline 840 | } 841 | } 842 | } 843 | 844 | void display_manage_models_menu(ModelSuite* suites, int count) { 845 | SceCtrlData pad; 846 | // wait for button release 847 | while(sceCtrlPeekBufferPositive(0, &pad, 1), pad.buttons) { sceKernelDelayThread(16 * 1000); } 848 | uint32_t old_buttons = 0; 849 | int selection = 0; 850 | int x = 0, y = 0; 851 | 852 | while (1) { 853 | psvDebugScreenClear(0xFF000000); 854 | psvDebugScreenSetCoordsXY(&x, &y); 855 | psvDebugScreenPrintf(COLOR_CYAN "Manage Models:\n\n" COLOR_RESET); 856 | 857 | for (int i = 0; i < count; i++) { 858 | FILE* model_file = fopen(suites[i].model_path, "rb"); 859 | int is_downloaded = 0; 860 | if(model_file){ 861 | fclose(model_file); 862 | FILE* tokenizer_file = fopen(suites[i].tokenizer_path, "rb"); 863 | if(tokenizer_file){ 864 | fclose(tokenizer_file); 865 | is_downloaded = 1; 866 | } 867 | } 868 | 869 | if (i == selection) { 870 | psvDebugScreenPrintf("-> "); 871 | } else { 872 | psvDebugScreenPrintf(" "); 873 | } 874 | 875 | if (is_downloaded) { 876 | psvDebugScreenPrintf(COLOR_GREEN "%s (Downloaded)\n" COLOR_RESET, suites[i].name); 877 | } else { 878 | psvDebugScreenPrintf(COLOR_YELLOW "%s (Not Downloaded)\n" COLOR_RESET, suites[i].name); 879 | } 880 | } 881 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress X to manage, O to go back.\n" COLOR_RESET); 882 | 883 | while(1) { 884 | sceCtrlPeekBufferPositive(0, &pad, 1); 885 | uint32_t pressed = pad.buttons & ~old_buttons; 886 | old_buttons = pad.buttons; 887 | 888 | if (pressed & SCE_CTRL_UP) { 889 | selection = (selection > 0) ? selection - 1 : count - 1; 890 | break; 891 | } 892 | if (pressed & SCE_CTRL_DOWN) { 893 | selection = (selection < count - 1) ? selection + 1 : 0; 894 | break; 895 | } 896 | if (pressed & SCE_CTRL_CIRCLE) { 897 | return; 898 | } 899 | if (pressed & SCE_CTRL_CROSS) { 900 | FILE* model_file = fopen(suites[selection].model_path, "rb"); 901 | int is_downloaded = 0; 902 | if(model_file){ 903 | fclose(model_file); 904 | FILE* tokenizer_file = fopen(suites[selection].tokenizer_path, "rb"); 905 | if(tokenizer_file){ 906 | fclose(tokenizer_file); 907 | is_downloaded = 1; 908 | } 909 | } 910 | 911 | if (is_downloaded) { 912 | psvDebugScreenClear(0xFF000000); 913 | psvDebugScreenSetCoordsXY(&x, &y); 914 | psvDebugScreenPrintf(COLOR_RED "Delete %s and its tokenizer?\n\n" COLOR_RESET, suites[selection].name); 915 | psvDebugScreenPrintf(COLOR_YELLOW "Press X to confirm, O to cancel.\n" COLOR_RESET); 916 | 917 | while(1) { 918 | uint32_t confirm_pressed = 0; 919 | sceCtrlPeekBufferPositive(0, &pad, 1); 920 | confirm_pressed = pad.buttons & ~old_buttons; 921 | old_buttons = pad.buttons; 922 | 923 | if (confirm_pressed & SCE_CTRL_CIRCLE) { 924 | break; 925 | } 926 | if (confirm_pressed & SCE_CTRL_CROSS) { 927 | sceIoRemove(suites[selection].model_path); 928 | sceIoRemove(suites[selection].tokenizer_path); 929 | psvDebugScreenClear(0xFF000000); 930 | psvDebugScreenSetCoordsXY(&x, &y); 931 | psvDebugScreenPrintf(COLOR_GREEN "Model deleted.\n" COLOR_RESET); 932 | sceKernelDelayThread(1 * 1000 * 1000); 933 | break; 934 | } 935 | sceKernelDelayThread(16 * 1000); 936 | } 937 | } else { 938 | psvDebugScreenClear(0xFF000000); 939 | psvDebugScreenSetCoordsXY(&x, &y); 940 | psvDebugScreenPrintf(COLOR_CYAN "Download %s and its tokenizer?\n\n" COLOR_RESET, suites[selection].name); 941 | psvDebugScreenPrintf(COLOR_YELLOW "Press X to confirm, O to cancel.\n" COLOR_RESET); 942 | 943 | while(1) { 944 | uint32_t confirm_pressed = 0; 945 | sceCtrlPeekBufferPositive(0, &pad, 1); 946 | confirm_pressed = pad.buttons & ~old_buttons; 947 | old_buttons = pad.buttons; 948 | 949 | if (confirm_pressed & SCE_CTRL_CIRCLE) { 950 | break; 951 | } 952 | if (confirm_pressed & SCE_CTRL_CROSS) { 953 | psvDebugScreenClear(0xFF000000); 954 | psvDebugScreenPrintf(COLOR_CYAN "Downloading %s...\n" COLOR_RESET, suites[selection].name); 955 | download_file(suites[selection].model_url, suites[selection].model_path); 956 | 957 | psvDebugScreenPrintf(COLOR_CYAN "Downloading tokenizer %s...\n" COLOR_RESET, suites[selection].tokenizer_name); 958 | download_file(suites[selection].tokenizer_url, suites[selection].tokenizer_path); 959 | 960 | psvDebugScreenPrintf(COLOR_GREEN "\nDownloads finished. Press X to continue.\n" COLOR_RESET); 961 | while(1){ 962 | uint32_t continue_pressed = 0; 963 | sceCtrlPeekBufferPositive(0, &pad, 1); 964 | continue_pressed = pad.buttons & ~old_buttons; 965 | old_buttons = pad.buttons; 966 | if(continue_pressed & SCE_CTRL_CROSS) break; 967 | sceKernelDelayThread(16 * 1000); 968 | } 969 | break; 970 | } 971 | sceKernelDelayThread(16 * 1000); 972 | } 973 | } 974 | break; 975 | } 976 | sceKernelDelayThread(16 * 1000); 977 | } 978 | } 979 | } 980 | 981 | void netInit() { 982 | sceSysmoduleLoadModule(SCE_SYSMODULE_NET); 983 | SceNetInitParam netInitParam; 984 | int size = 4 * 1024 * 1024; 985 | netInitParam.memory = malloc(size); 986 | netInitParam.size = size; 987 | netInitParam.flags = 0; 988 | sceNetInit(&netInitParam); 989 | sceNetCtlInit(); 990 | } 991 | 992 | void httpInit() { 993 | sceSysmoduleLoadModule(SCE_SYSMODULE_HTTP); 994 | sceHttpInit(4 * 1024 * 1024); 995 | } 996 | 997 | static size_t write_data_to_disk(void *ptr, size_t size, size_t nmemb, void *stream) { 998 | return sceIoWrite(*(int*)stream, ptr, size * nmemb); 999 | } 1000 | 1001 | int progress_callback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) { 1002 | // Only show progress for transfers over 10KB to filter out small HTTP responses (e.g. redirects) 1003 | if (dltotal > 10000) { 1004 | float percent = (float)dlnow / (float)dltotal * 100.0f; 1005 | psvDebugScreenPrintf("\rDownloading... %6.2f%%", percent); 1006 | } 1007 | return 0; 1008 | } 1009 | 1010 | void download_file(const char* url, const char* filepath) { 1011 | CURL *curl; 1012 | CURLcode res; 1013 | int file_fd = sceIoOpen(filepath, SCE_O_WRONLY | SCE_O_CREAT | SCE_O_TRUNC, 0777); 1014 | 1015 | if (file_fd < 0) { 1016 | psvDebugScreenPrintf(COLOR_RED "\nError opening file for writing: %s\n" COLOR_RESET, filepath); 1017 | return; 1018 | } 1019 | 1020 | curl = curl_easy_init(); 1021 | if (curl) { 1022 | curl_easy_setopt(curl, CURLOPT_URL, url); 1023 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data_to_disk); 1024 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, &file_fd); 1025 | curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback); 1026 | curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); 1027 | curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); 1028 | curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); 1029 | curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); 1030 | 1031 | res = curl_easy_perform(curl); 1032 | psvDebugScreenPrintf("\n"); 1033 | 1034 | if (res != CURLE_OK) { 1035 | psvDebugScreenPrintf(COLOR_RED "Download failed: %s\n" COLOR_RESET, curl_easy_strerror(res)); 1036 | } else { 1037 | psvDebugScreenPrintf(COLOR_GREEN "Download successful!\n" COLOR_RESET); 1038 | } 1039 | curl_easy_cleanup(curl); 1040 | } 1041 | sceIoClose(file_fd); 1042 | } 1043 | 1044 | void display_download_menu() { 1045 | SceCtrlData pad; 1046 | // wait for button release 1047 | while(sceCtrlPeekBufferPositive(0, &pad, 1), pad.buttons) { sceKernelDelayThread(16 * 1000); } 1048 | uint32_t old_buttons = 0; 1049 | int selection = 0; 1050 | int x = 0, y = 0; 1051 | const int num_options = 3; 1052 | const char* options[] = {"Download 15M Model", "Download 260K Model", "Back"}; 1053 | 1054 | ModelSuite suites[] = { 1055 | {"ux0:data/stories15M.bin", "ux0:data/tokenizer.bin", "stories15M", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin", "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.bin", "tokenizer.bin"}, 1056 | {"ux0:data/stories260K.bin", "ux0:data/tok512.bin", "stories260K", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin", "tok512.bin"} 1057 | }; 1058 | 1059 | while(1) { 1060 | psvDebugScreenClear(0xFF000000); 1061 | psvDebugScreenSetCoordsXY(&x, &y); 1062 | psvDebugScreenPrintf(COLOR_CYAN "No models found. Would you like to download them?\n\n" COLOR_RESET); 1063 | for(int i=0; i < num_options; ++i) { 1064 | if(i == selection) { 1065 | psvDebugScreenPrintf("-> " COLOR_GREEN "%s\n" COLOR_RESET, options[i]); 1066 | } else { 1067 | psvDebugScreenPrintf(" %s\n", options[i]); 1068 | } 1069 | } 1070 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress X to confirm, UP/DOWN to navigate.\n" COLOR_RESET); 1071 | 1072 | while(1) { 1073 | sceCtrlPeekBufferPositive(0, &pad, 1); 1074 | uint32_t pressed = pad.buttons & ~old_buttons; 1075 | old_buttons = pad.buttons; 1076 | 1077 | if(pressed & SCE_CTRL_UP) { 1078 | selection = (selection > 0) ? selection - 1 : num_options - 1; 1079 | break; 1080 | } 1081 | if(pressed & SCE_CTRL_DOWN) { 1082 | selection = (selection < num_options - 1) ? selection + 1 : 0; 1083 | break; 1084 | } 1085 | if(pressed & SCE_CTRL_CROSS) { 1086 | if(selection == 2) { 1087 | psvDebugScreenPrintf(COLOR_RED "No models to load. Exiting.\n" COLOR_RESET); 1088 | sceKernelDelayThread(2 * 1000 * 1000); 1089 | sceKernelExitProcess(0); 1090 | } 1091 | 1092 | psvDebugScreenClear(0xFF000000); 1093 | psvDebugScreenPrintf(COLOR_CYAN "Downloading %s...\n" COLOR_RESET, suites[selection].name); 1094 | download_file(suites[selection].model_url, suites[selection].model_path); 1095 | 1096 | psvDebugScreenPrintf(COLOR_CYAN "Downloading tokenizer %s...\n" COLOR_RESET, suites[selection].tokenizer_name); 1097 | download_file(suites[selection].tokenizer_url, suites[selection].tokenizer_path); 1098 | 1099 | psvDebugScreenPrintf(COLOR_GREEN "\nDownloads finished. Press X to continue.\n" COLOR_RESET); 1100 | while(1){ 1101 | uint32_t confirm_pressed = 0; 1102 | sceCtrlPeekBufferPositive(0, &pad, 1); 1103 | confirm_pressed = pad.buttons & ~old_buttons; 1104 | old_buttons = pad.buttons; 1105 | if(confirm_pressed & SCE_CTRL_CROSS) break; 1106 | sceKernelDelayThread(16 * 1000); 1107 | } 1108 | return; 1109 | } 1110 | sceKernelDelayThread(16*1000); 1111 | } 1112 | } 1113 | } 1114 | 1115 | void display_model_selection_menu(ModelSuite* suites, int count, int* selected_index, int* menu_action) { 1116 | SceCtrlData pad; 1117 | // wait for button release 1118 | while(sceCtrlPeekBufferPositive(0, &pad, 1), pad.buttons) { sceKernelDelayThread(16 * 1000); } 1119 | uint32_t old_buttons = 0; 1120 | int current_selection = 0; 1121 | int x = 0, y = 0; 1122 | const int num_options = count + 1; 1123 | 1124 | while (1) { 1125 | psvDebugScreenClear(0xFF000000); 1126 | psvDebugScreenSetCoordsXY(&x, &y); 1127 | psvDebugScreenPrintf(COLOR_CYAN "Select a model:\n\n" COLOR_RESET); 1128 | 1129 | for (int i = 0; i < count; i++) { 1130 | if (i == current_selection) { 1131 | psvDebugScreenPrintf("-> " COLOR_GREEN "%s\n" COLOR_RESET, suites[i].name); 1132 | } else { 1133 | psvDebugScreenPrintf(" %s\n", suites[i].name); 1134 | } 1135 | } 1136 | 1137 | if (current_selection == count) { 1138 | psvDebugScreenPrintf("-> " COLOR_YELLOW "%s\n" COLOR_RESET, "Manage local models..."); 1139 | } else { 1140 | psvDebugScreenPrintf(" %s\n", "Manage local models..."); 1141 | } 1142 | 1143 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress X to confirm, UP/DOWN to navigate.\n" COLOR_RESET); 1144 | 1145 | while (1) { 1146 | sceCtrlPeekBufferPositive(0, &pad, 1); 1147 | uint32_t pressed_buttons = pad.buttons & ~old_buttons; 1148 | old_buttons = pad.buttons; 1149 | 1150 | if (pressed_buttons & SCE_CTRL_DOWN) { 1151 | current_selection = (current_selection < num_options - 1) ? current_selection + 1 : 0; 1152 | break; 1153 | } 1154 | if (pressed_buttons & SCE_CTRL_UP) { 1155 | current_selection = (current_selection > 0) ? current_selection - 1 : num_options - 1; 1156 | break; 1157 | } 1158 | if (pressed_buttons & SCE_CTRL_CROSS) { 1159 | if(current_selection == count) { 1160 | *menu_action = 1; // Manage models 1161 | } else { 1162 | *menu_action = 0; // Load model 1163 | *selected_index = current_selection; 1164 | } 1165 | return; 1166 | } 1167 | sceKernelDelayThread(16 * 1000); 1168 | } 1169 | } 1170 | } 1171 | 1172 | int main(int argc, char *argv[]) { 1173 | psvDebugScreenInit(); 1174 | PsvDebugScreenFont* current_font = psvDebugScreenGetFont(); 1175 | PsvDebugScreenFont* scaled_font = psvDebugScreenScaleFont2x(current_font); 1176 | psvDebugScreenSetFont(scaled_font); 1177 | psvDebugScreenPrintf(COLOR_CYAN "Application started.\n" COLOR_RESET); 1178 | netInit(); 1179 | httpInit(); 1180 | 1181 | while (1) { 1182 | ModelSuite all_suites[] = { 1183 | {"ux0:data/stories15M.bin", "ux0:data/tokenizer.bin", "stories15M", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin", "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.bin", "tokenizer.bin"}, 1184 | {"ux0:data/stories260K.bin", "ux0:data/tok512.bin", "stories260K", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin", "tok512.bin"} 1185 | }; 1186 | ModelSuite found_suites[4]; 1187 | int found_count = 0; 1188 | 1189 | for (int i = 0; i < sizeof(all_suites) / sizeof(ModelSuite); i++) { 1190 | FILE* model_file = fopen(all_suites[i].model_path, "rb"); 1191 | if (model_file) { 1192 | fclose(model_file); 1193 | FILE* tokenizer_file = fopen(all_suites[i].tokenizer_path, "rb"); 1194 | if (tokenizer_file) { 1195 | fclose(tokenizer_file); 1196 | found_suites[found_count++] = all_suites[i]; 1197 | } 1198 | } 1199 | } 1200 | 1201 | char* checkpoint_path = NULL; 1202 | char* tokenizer_path = NULL; 1203 | 1204 | if (found_count == 0) { 1205 | display_download_menu(); 1206 | continue; 1207 | } 1208 | 1209 | int menu_action = 0; 1210 | int selected_index = 0; 1211 | display_model_selection_menu(found_suites, found_count, &selected_index, &menu_action); 1212 | 1213 | if (menu_action == 1) { 1214 | display_manage_models_menu(all_suites, sizeof(all_suites) / sizeof(ModelSuite)); 1215 | continue; 1216 | } 1217 | 1218 | checkpoint_path = found_suites[selected_index].model_path; 1219 | tokenizer_path = found_suites[selected_index].tokenizer_path; 1220 | 1221 | float temperature = 1.0f; 1222 | float topp = 0.9f; 1223 | int steps = 0; 1224 | char *prompt = "Once upon a time"; 1225 | unsigned long long rng_seed = (unsigned int)time(NULL); 1226 | 1227 | psvDebugScreenClear(0xFF000000); 1228 | int x = 0, y = 0; 1229 | psvDebugScreenSetCoordsXY(&x, &y); 1230 | Transformer transformer; 1231 | build_transformer(&transformer, checkpoint_path); 1232 | if (steps == 0 || steps > transformer.config.seq_len) { 1233 | steps = transformer.config.seq_len; 1234 | } 1235 | 1236 | Tokenizer tokenizer; 1237 | build_tokenizer(&tokenizer, tokenizer_path, transformer.config.vocab_size); 1238 | Sampler sampler; 1239 | build_sampler(&sampler, transformer.config.vocab_size, temperature, topp, rng_seed); 1240 | psvDebugScreenClear(0xFF000000); 1241 | psvDebugScreenSetCoordsXY(&x, &y); 1242 | 1243 | int main_loop_action = 0; 1244 | 1245 | while(1){ 1246 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress the X button to start generation.\n" COLOR_RESET); 1247 | uint32_t old_buttons = 0; 1248 | while(1){ 1249 | SceCtrlData pad; 1250 | sceCtrlPeekBufferPositive(0, &pad, 1); 1251 | uint32_t pressed = pad.buttons & ~old_buttons; 1252 | old_buttons = pad.buttons; 1253 | if(pressed & SCE_CTRL_CROSS) 1254 | break; 1255 | sceKernelDelayThread(16 * 1000); 1256 | } 1257 | 1258 | psvDebugScreenClear(0xFF000000); 1259 | psvDebugScreenSetCoordsXY(&x, &y); 1260 | generate(&transformer, &tokenizer, &sampler, prompt, steps); 1261 | psvDebugScreenPrintf(COLOR_GREEN "Text generation finished.\n" COLOR_RESET); 1262 | 1263 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress SQUARE for a new story\n" COLOR_RESET); 1264 | psvDebugScreenPrintf(COLOR_YELLOW "Press CIRCLE to change model\n" COLOR_RESET); 1265 | psvDebugScreenPrintf(COLOR_YELLOW "Press X to exit.\n" COLOR_RESET); 1266 | 1267 | int story_loop_action = 0; 1268 | old_buttons = 0; 1269 | while(1) { 1270 | SceCtrlData pad; 1271 | sceCtrlPeekBufferPositive(0, &pad, 1); 1272 | uint32_t pressed_buttons = pad.buttons & ~old_buttons; 1273 | old_buttons = pad.buttons; 1274 | 1275 | if (pressed_buttons & SCE_CTRL_SQUARE) { 1276 | story_loop_action = 0; 1277 | break; 1278 | } 1279 | if (pressed_buttons & SCE_CTRL_CIRCLE) { 1280 | story_loop_action = 1; 1281 | break; 1282 | } 1283 | if (pressed_buttons & SCE_CTRL_CROSS) { 1284 | story_loop_action = 2; 1285 | break; 1286 | } 1287 | sceKernelDelayThread(16 * 1000); 1288 | } 1289 | 1290 | if (story_loop_action == 0) { 1291 | psvDebugScreenClear(0xFF000000); 1292 | psvDebugScreenSetCoordsXY(&x, &y); 1293 | continue; 1294 | } else { 1295 | if (story_loop_action == 2) main_loop_action = 1; 1296 | break; 1297 | } 1298 | } 1299 | 1300 | free_sampler(&sampler); 1301 | free_tokenizer(&tokenizer); 1302 | free_transformer(&transformer); 1303 | 1304 | if (main_loop_action == 1) { 1305 | break; 1306 | } 1307 | } 1308 | 1309 | psvDebugScreenPrintf(COLOR_CYAN "\nCleanup finished. Exiting.\n" COLOR_RESET); 1310 | sceKernelDelayThread(2 * 1000 * 1000); 1311 | sceKernelExitProcess(0); 1312 | return 0; 1313 | } 1314 | --------------------------------------------------------------------------------