├── sce_sys
├── icon0.png
└── livearea
│ └── contents
│ ├── bg.png
│ ├── startup.png
│ └── template.xml
├── .gitignore
├── common
├── debugScreen_custom.h
├── debugScreenFont.builder.html
├── debugScreen.h
├── debugScreenFont.c
└── debugScreen.c
├── README.md
├── CMakeLists.txt
├── fetch_models.py
├── Makefile
└── vita_llm.c
/sce_sys/icon0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callbacked/psvita-llm/HEAD/sce_sys/icon0.png
--------------------------------------------------------------------------------
/sce_sys/livearea/contents/bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callbacked/psvita-llm/HEAD/sce_sys/livearea/contents/bg.png
--------------------------------------------------------------------------------
/sce_sys/livearea/contents/startup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callbacked/psvita-llm/HEAD/sce_sys/livearea/contents/startup.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | CMakeFiles/
2 | CMakeCache.txt
3 | *.self
4 | *.self.out
5 | *.velf
6 | *.vpk_param.sfo
7 | *.vpk
8 | *.vpk.out
9 | *.png
10 | *.cmake
11 | psvita-llm
12 | /build
13 | .DS_Store
14 |
15 |
--------------------------------------------------------------------------------
/sce_sys/livearea/contents/template.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | bg.png
6 |
7 |
8 |
9 | startup.png
10 |
11 |
12 |
--------------------------------------------------------------------------------
/common/debugScreen_custom.h:
--------------------------------------------------------------------------------
1 | #ifndef DEBUG_SCREEN_CUSTOM_H
2 | #define DEBUG_SCREEN_CUSTOM_H
3 |
4 | //#define SCREEN_TAB_SIZE (8)
5 |
6 | // backward compatibility for sources based on older Vita SDK versions
7 | //#define DEBUG_SCREEN_CODE_INCLUDE // not recommended for your own projects, but for sake of backward compatibility
8 | #define psvDebugScreenSetFgColor(rgb) psvDebugScreenPrintf("\e[38;2;%lu;%lu;%lum", ((uint32_t)(rgb)>>16)&0xFF, ((uint32_t)(rgb)>>8)&0xFF, (uint32_t)(rgb)&0xFF)
9 | #define psvDebugScreenSetBgColor(rgb) psvDebugScreenPrintf("\e[48;2;%lu;%lu;%lum", ((uint32_t)(rgb)>>16)&0xFF, ((uint32_t)(rgb)>>8)&0xFF, (uint32_t)(rgb)&0xFF)
10 | #define psvDebugScreenClear(rgb) psvDebugScreenSetBgColor(rgb); psvDebugScreenPuts("\e[H\e[2J")
11 |
12 | // custom changes for non-Vita builds
13 | #ifndef __vita__
14 | #define psvDebugScreenInitReplacement(...) setvbuf(stdout,NULL,_IONBF,0)
15 | #endif
16 |
17 | #endif
18 |
--------------------------------------------------------------------------------
/common/debugScreenFont.builder.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/common/debugScreen.h:
--------------------------------------------------------------------------------
1 | #ifndef DEBUG_SCREEN_H
2 | #define DEBUG_SCREEN_H
3 |
4 | #include "debugScreen_custom.h"
5 |
6 | typedef struct ColorState {
7 | int fgTrueColorFlag; // flag if truecolors or ANSI/VTERM/GREYSCALE colors are used
8 | int bgTrueColorFlag; // flag if truecolors or ANSI/VTERM/GREYSCALE colors are used
9 | // truecolors
10 | uint32_t fgTrueColor; // color in RGB (internal BGR)
11 | uint32_t bgTrueColor; // color in RGB (internal BGR)
12 | // ANSI/VTERM/GREYSCALE colors
13 | unsigned char fgIndex; // ANSI/VTERM/GREYSCALE color code (0-255)
14 | unsigned char fgIntensity; // 22=normal, 1=increased ("bright"), 2=decreased ("dark")
15 | unsigned char bgIndex; // ANSI/VTERM/GREYSCALE color code (0-255)
16 | unsigned char bgIntensity; // 22=normal, 1=increased ("bright")
17 | int inversion; // flag if bg/fg colors are inverted
18 |
19 | // default colors (ANSI/VTERM/GREYSCALE)
20 | unsigned char fgIndexDefault; // default ANSI/VTERM/GREYSCALE color code
21 | unsigned char fgIntensityDefault; // 22=normal, 1=increased, 2=decreased
22 | unsigned char bgIndexDefault; // default ANSI/VTERM/GREYSCALE color code
23 | unsigned char bgIntensityDefault; // 22=normal, 1=increased
24 | int inversionDefault; // flag if bg/fg colors are inverted
25 |
26 | // current colors (e.g. inverted)
27 | uint32_t color_fg; // color in RGB (internal BGR)
28 | uint32_t color_bg; // color in RGB (internal BGR)
29 | } ColorState;
30 |
31 | typedef struct PsvDebugScreenFont {
32 | unsigned char *glyphs, width, height, first, last, size_w, size_h; // only values 0-255
33 | } PsvDebugScreenFont;
34 |
35 | #define SCREEN_WIDTH (960) // screen resolution x
36 | #define SCREEN_HEIGHT (544) // screen resolution y
37 |
38 | #ifdef DEBUG_SCREEN_CODE_INCLUDE // not recommended for your own projects, but for sake of backward compatibility
39 | #include "debugScreen.c"
40 | #else
41 | #ifdef __cplusplus
42 | extern "C" {
43 | #endif
44 | int psvDebugScreenInit();
45 | int psvDebugScreenFinish();
46 | int psvDebugScreenPuts(const char * _text);
47 | int psvDebugScreenPrintf(const char *format, ...);
48 | void psvDebugScreenGetColorStateCopy(ColorState *copy);
49 | void psvDebugScreenGetCoordsXY(int *x, int *y);
50 | void psvDebugScreenSetCoordsXY(int *x, int *y);
51 | PsvDebugScreenFont *psvDebugScreenGetFont(void);
52 | PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font);
53 | PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font);
54 | #ifdef __cplusplus
55 | }
56 | #endif
57 | #endif
58 |
59 | #endif /* DEBUG_SCREEN_H */
60 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PSVita-LLM
2 |
3 | > “Your scientists were so preoccupied with whether or not they could, they didn't stop to think if they should.”
4 |
5 | After blowing the dust off my PS Vita to play *LittleBigPlanet*, a stray question popped up:
6 |
7 | **Could it run an LLM?**
8 |
9 | Turns out it can! **PSVita-LLM** runs a modified version of `llama2.c` to load and infer the TinyStories 260K and 15M checkpoints right on the Vita.
10 |
11 |
12 | | Model | Parameters | File size | Inference speed (PCH-1000 Overclocked @ 555 MHz) | Results |
13 | |-------|------------|-----------|---------------------------------------------------|---------|
14 | | TinyStories-260K | 0.26 M | 1 MB | ≈ 120 tok/s |
|
15 | | TinyStories-15M | 15 M | 60 MB | ≈ 1.8 tok/s |
|
16 |
17 |
18 | ## Features
19 |
20 | - **Interactive Model Selector:** On startup, the app will prompt the user to a download model to start should it detect that there are no models downloaded.
21 | - **Full "Game Loop":** After a story is generated, you can choose to generate another, return to the model selection screen to switch/manage models, or exit the app completely.
22 |
23 | ## How to Use
24 |
25 | 1. **Install the VPK:** Transfer the `psvita-llm.vpk` file to your Vita and install it using VitaShell.
26 | 2. **Download Models & Tokenizers:** Upon first boot, the program will give you the models available to download. You can delete models and download any other model after that in the "Manage local models.." menu.
27 | 3. **Enjoy!**
28 |
29 |
30 | ## Building from Source
31 |
32 | To build this project yourself, you will need a working [VitaSDK](https://vitasdk.org/) installation.
33 |
34 | Once the SDK is configured, clone the repository and run:
35 |
36 | ```bash
37 | cmake .
38 | ```
39 |
40 | This project would not be possible without Andrej Karpathy's foundational work on `llama2.c`. What was done was more of a port job if anything, but a great way to learn!
41 |
42 | ### Future Improvements
43 |
44 | Some notes that I want to leave in before I forget
45 |
46 | * **Breaking up some code:** Having everything in a single file like the original llama2.c file is pretty cool, but I should have seperated the networking code as I feel like that could be used in other projects that involve downloading stuff/doing curl calls on the internet, it'd be a good reference to have.
47 |
48 | * **Multithreading:** The current code has commented out `#pragma omp` directives. It's because OpenMP does not play nicely with the Vita's CPU. Leaving it on led to crashes upon token generation. A significant performance boost could probably be seen by implementing a native multithreading solution using stuff in `SceThreadMgr` library in the sdk (?), especially for parallelizing the `matmul` ops in the transformer's forward pass. **For now this is all single threaded**.
49 |
50 | But I'm out of my depth in terms of development with the SDK, though it is something worth considering should I give this project another look.
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | ## This file is a quick tutorial on writing CMakeLists for targeting the Vita
2 | cmake_minimum_required(VERSION 3.16)
3 |
4 | ## This includes the Vita toolchain, must go before project definition
5 | # It is a convenience so you do not have to type
6 | # -DCMAKE_TOOLCHAIN_FILE=$VITASDK/share/vita.toolchain.cmake for cmake. It is
7 | # highly recommended that you include this block for all projects.
8 | if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
9 | if(DEFINED ENV{VITASDK})
10 | set(CMAKE_TOOLCHAIN_FILE "$ENV{VITASDK}/share/vita.toolchain.cmake" CACHE PATH "toolchain file")
11 | else()
12 | message(FATAL_ERROR "Please define VITASDK to point to your SDK path!")
13 | endif()
14 | endif()
15 |
16 | ## Define project parameters here
17 | # Name of the project
18 | project(psvita-llm)
19 | # This line adds Vita helper macros, must go after project definition in order
20 | # to build Vita specific artifacts (self/vpk).
21 | include("${VITASDK}/share/vita.cmake" REQUIRED)
22 |
23 | ## Configuration options for this app
24 | # Display name (under bubble in LiveArea)
25 | set(VITA_APP_NAME "PSVita LLM")
26 | # Unique ID must be exactly 9 characters. Recommended: XXXXYYYYY where X =
27 | # unique string of developer and Y = a unique number for this app
28 | set(VITA_TITLEID "LLMA00001")
29 | # Optional version string to show in LiveArea's more info screen
30 | set(VITA_VERSION "01.00")
31 |
32 | ## Flags and includes for building
33 | # Note that we make sure not to overwrite previous flags
34 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unused-function -Wno-unused-variable -mcpu=cortex-a9 -mfloat-abi=hard -mfpu=neon -O3 -Ofast -fno-signed-zeros -fno-trapping-math -fassociative-math -freciprocal-math -fopenmp")
35 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
36 | # Optional. You can specify more param.sfo flags this way.
37 | set(VITA_MKSFOEX_FLAGS "${VITA_MKSFOEX_FLAGS} -d PARENTAL_LEVEL=1")
38 |
39 | find_package(CURL REQUIRED)
40 | find_package(OpenSSL REQUIRED)
41 |
42 | # Add any additional include paths here
43 | include_directories(
44 | common
45 | )
46 |
47 | # Add any additional library paths here
48 | # ${CMAKE_CURRENT_BINARY_DIR} lets you use any library currently being built
49 | link_directories(
50 | ${CMAKE_CURRENT_BINARY_DIR}
51 | )
52 |
53 | ## Build and link
54 | # Add all the files needed to compile here
55 | add_executable(${PROJECT_NAME}
56 | vita_llm.c
57 | common/debugScreen.c
58 | )
59 |
60 | # Library to link to (drop the -l prefix). This will mostly be stubs.
61 | target_link_libraries(${PROJECT_NAME}
62 | m
63 | SceCtrl_stub
64 | SceSysmodule_stub
65 | SceNet_stub
66 | SceNetCtl_stub
67 | SceHttp_stub
68 | SceSsl_stub
69 | SceDisplay_stub # This used by debugScreenPrintf()
70 | ${CURL_LIBRARIES}
71 | ${OPENSSL_LIBRARIES}
72 | )
73 |
74 | ## Create Vita files
75 | vita_create_self(${PROJECT_NAME}.self ${PROJECT_NAME})
76 | # The FILE directive lets you add additional files to the VPK, the syntax is
77 | # FILE src_path dst_path_in_vpk. In this case, we add the LiveArea paths.
78 | vita_create_vpk(${PROJECT_NAME}.vpk ${VITA_TITLEID} ${PROJECT_NAME}.self
79 | VERSION ${VITA_VERSION}
80 | NAME ${VITA_APP_NAME}
81 | FILE sce_sys/icon0.png sce_sys/icon0.png
82 | FILE sce_sys/livearea/contents/bg.png sce_sys/livearea/contents/bg.png
83 | FILE sce_sys/livearea/contents/startup.png sce_sys/livearea/contents/startup.png
84 | FILE sce_sys/livearea/contents/template.xml sce_sys/livearea/contents/template.xml
85 | )
86 |
--------------------------------------------------------------------------------
/fetch_models.py:
--------------------------------------------------------------------------------
1 | import urllib.request
2 | import os
3 | import sys
4 |
5 | # stuff to grab (this script was from when I didn't have a way to download the models in-app so I made this script to download them to transfer to my vita through ftp)
6 |
7 | MODELS = {
8 | "260K": [
9 | {
10 | "name": "stories260K.bin",
11 | "url": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin"
12 | },
13 | {
14 | "name": "tok512.bin",
15 | "url": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin"
16 | }
17 | ],
18 | "15M": [
19 | {
20 | "name": "stories15M.bin",
21 | "url": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin"
22 | },
23 | {
24 | "name": "tokenizer.bin",
25 | "url": "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.bin"
26 | }
27 | ]
28 | }
29 |
30 | def _reporthook(count, block_size, total_size):
31 | """A simple reporthook for urllib.request.urlretrieve."""
32 | percent = int(count * block_size * 100 / total_size)
33 | sys.stdout.write(f"\r Downloading... {percent}%")
34 | sys.stdout.flush()
35 |
36 | def download_file(name, url):
37 | """Downloads a file from a URL to the current directory."""
38 | if os.path.exists(name):
39 | print(f"-> {name} already exists. Skipping.")
40 | return
41 | print(f"-> Downloading {name}...")
42 | try:
43 | urllib.request.urlretrieve(url, name, _reporthook)
44 | sys.stdout.write("\n") # Move to the next line after download completes
45 | print(f" Done.")
46 | except Exception as e:
47 | print(f"\n Error downloading {name}: {e}")
48 | print(" Please check your internet connection and the URL.")
49 |
50 | def download_model_suite(key):
51 | """Downloads all files for a given model suite."""
52 | if key in MODELS:
53 | print(f"\nFetching {key} model suite...")
54 | for file_info in MODELS[key]:
55 | download_file(file_info["name"], file_info["url"])
56 | else:
57 | print(f"Error: Model suite '{key}' not found.")
58 |
59 | def display_final_instructions():
60 | """Prints the final instructions for the user."""
61 | print("\n" + "="*50)
62 | print(" DOWNLOAD COMPLETE!")
63 | print("="*50)
64 | print("\nThe necessary files have been downloaded to the current directory:")
65 | print(f" {os.getcwd()}")
66 | print("\nNext steps:")
67 | print("1. Connect your PS Vita to another device via USB or FTP using VitaShell.")
68 | print("2. Navigate to the `ux0:` partition on your Vita.")
69 | print("3. Copy the downloaded .bin files into the `ux0:data/` folder.")
70 | print("\nOnce the files are in place, you can run PSVita-LLM!")
71 | print("="*50 + "\n")
72 |
73 |
74 | def main():
75 | """Main function to run the interactive downloader."""
76 | print("="*50)
77 | print("Model Downloader")
78 | print("="*50)
79 | print("This script will download the required model and tokenizer")
80 | print("files for the PSVita-LLM application.\n")
81 |
82 | while True:
83 | print("Please choose an option:")
84 | print(" [1] Download the 260K model (~1 MB)")
85 | print(" [2] Download the 15M model (~60 MB)")
86 | print(" [3] Download BOTH models")
87 | print(" [4] Exit")
88 |
89 | choice = input("\nEnter your choice (1-4): ")
90 |
91 | if choice == '1':
92 | download_model_suite("260K")
93 | display_final_instructions()
94 | break
95 | elif choice == '2':
96 | download_model_suite("15M")
97 | display_final_instructions()
98 | break
99 | elif choice == '3':
100 | download_model_suite("260K")
101 | download_model_suite("15M")
102 | display_final_instructions()
103 | break
104 | elif choice == '4':
105 | print("Exiting.")
106 | break
107 | else:
108 | print("\nInvalid choice. Please enter a number between 1 and 4.\n")
109 |
110 | if __name__ == "__main__":
111 | main()
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.31
3 |
4 | # Default target executed when no arguments are given to make.
5 | default_target: all
6 | .PHONY : default_target
7 |
8 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
9 | .NOTPARALLEL:
10 |
11 | #=============================================================================
12 | # Special targets provided by cmake.
13 |
14 | # Disable implicit rules so canonical targets will work.
15 | .SUFFIXES:
16 |
17 | # Disable VCS-based implicit rules.
18 | % : %,v
19 |
20 | # Disable VCS-based implicit rules.
21 | % : RCS/%
22 |
23 | # Disable VCS-based implicit rules.
24 | % : RCS/%,v
25 |
26 | # Disable VCS-based implicit rules.
27 | % : SCCS/s.%
28 |
29 | # Disable VCS-based implicit rules.
30 | % : s.%
31 |
32 | .SUFFIXES: .hpux_make_needs_suffix_list
33 |
34 | # Command-line flag to silence nested $(MAKE).
35 | $(VERBOSE)MAKESILENT = -s
36 |
37 | #Suppress display of executed commands.
38 | $(VERBOSE).SILENT:
39 |
40 | # A target that is always out of date.
41 | cmake_force:
42 | .PHONY : cmake_force
43 |
44 | #=============================================================================
45 | # Set environment variables for the build.
46 |
47 | # The shell in which to execute make rules.
48 | SHELL = /bin/sh
49 |
50 | # The CMake executable.
51 | CMAKE_COMMAND = /opt/homebrew/bin/cmake
52 |
53 | # The command to remove a file.
54 | RM = /opt/homebrew/bin/cmake -E rm -f
55 |
56 | # Escaping for special characters.
57 | EQUALS = =
58 |
59 | # The top-level source directory on which CMake was run.
60 | CMAKE_SOURCE_DIR = /Users/alex/Documents/Code/psvita-llm
61 |
62 | # The top-level build directory on which CMake was run.
63 | CMAKE_BINARY_DIR = /Users/alex/Documents/Code/psvita-llm
64 |
65 | #=============================================================================
66 | # Targets provided globally by CMake.
67 |
68 | # Special rule for the target edit_cache
69 | edit_cache:
70 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake cache editor..."
71 | /opt/homebrew/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
72 | .PHONY : edit_cache
73 |
74 | # Special rule for the target edit_cache
75 | edit_cache/fast: edit_cache
76 | .PHONY : edit_cache/fast
77 |
78 | # Special rule for the target rebuild_cache
79 | rebuild_cache:
80 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..."
81 | /opt/homebrew/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
82 | .PHONY : rebuild_cache
83 |
84 | # Special rule for the target rebuild_cache
85 | rebuild_cache/fast: rebuild_cache
86 | .PHONY : rebuild_cache/fast
87 |
88 | # The main all target
89 | all: cmake_check_build_system
90 | $(CMAKE_COMMAND) -E cmake_progress_start /Users/alex/Documents/Code/psvita-llm/CMakeFiles /Users/alex/Documents/Code/psvita-llm//CMakeFiles/progress.marks
91 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 all
92 | $(CMAKE_COMMAND) -E cmake_progress_start /Users/alex/Documents/Code/psvita-llm/CMakeFiles 0
93 | .PHONY : all
94 |
95 | # The main clean target
96 | clean:
97 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 clean
98 | .PHONY : clean
99 |
100 | # The main clean target
101 | clean/fast: clean
102 | .PHONY : clean/fast
103 |
104 | # Prepare targets for installation.
105 | preinstall: all
106 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall
107 | .PHONY : preinstall
108 |
109 | # Prepare targets for installation.
110 | preinstall/fast:
111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 preinstall
112 | .PHONY : preinstall/fast
113 |
114 | # clear depends
115 | depend:
116 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
117 | .PHONY : depend
118 |
119 | #=============================================================================
120 | # Target rules for targets named psvita-llm
121 |
122 | # Build rule for target.
123 | psvita-llm: cmake_check_build_system
124 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm
125 | .PHONY : psvita-llm
126 |
127 | # fast build rule for target.
128 | psvita-llm/fast:
129 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/build
130 | .PHONY : psvita-llm/fast
131 |
132 | #=============================================================================
133 | # Target rules for targets named psvita-llm-velf
134 |
135 | # Build rule for target.
136 | psvita-llm-velf: cmake_check_build_system
137 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm-velf
138 | .PHONY : psvita-llm-velf
139 |
140 | # fast build rule for target.
141 | psvita-llm-velf/fast:
142 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm-velf.dir/build.make CMakeFiles/psvita-llm-velf.dir/build
143 | .PHONY : psvita-llm-velf/fast
144 |
145 | #=============================================================================
146 | # Target rules for targets named psvita-llm.self-self
147 |
148 | # Build rule for target.
149 | psvita-llm.self-self: cmake_check_build_system
150 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm.self-self
151 | .PHONY : psvita-llm.self-self
152 |
153 | # fast build rule for target.
154 | psvita-llm.self-self/fast:
155 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.self-self.dir/build.make CMakeFiles/psvita-llm.self-self.dir/build
156 | .PHONY : psvita-llm.self-self/fast
157 |
158 | #=============================================================================
159 | # Target rules for targets named psvita-llm.vpk-vpk
160 |
161 | # Build rule for target.
162 | psvita-llm.vpk-vpk: cmake_check_build_system
163 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 psvita-llm.vpk-vpk
164 | .PHONY : psvita-llm.vpk-vpk
165 |
166 | # fast build rule for target.
167 | psvita-llm.vpk-vpk/fast:
168 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.vpk-vpk.dir/build.make CMakeFiles/psvita-llm.vpk-vpk.dir/build
169 | .PHONY : psvita-llm.vpk-vpk/fast
170 |
171 | common/debugScreen.obj: common/debugScreen.c.obj
172 | .PHONY : common/debugScreen.obj
173 |
174 | # target to build an object file
175 | common/debugScreen.c.obj:
176 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/common/debugScreen.c.obj
177 | .PHONY : common/debugScreen.c.obj
178 |
179 | common/debugScreen.i: common/debugScreen.c.i
180 | .PHONY : common/debugScreen.i
181 |
182 | # target to preprocess a source file
183 | common/debugScreen.c.i:
184 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/common/debugScreen.c.i
185 | .PHONY : common/debugScreen.c.i
186 |
187 | common/debugScreen.s: common/debugScreen.c.s
188 | .PHONY : common/debugScreen.s
189 |
190 | # target to generate assembly for a file
191 | common/debugScreen.c.s:
192 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/common/debugScreen.c.s
193 | .PHONY : common/debugScreen.c.s
194 |
195 | vita_llm.obj: vita_llm.c.obj
196 | .PHONY : vita_llm.obj
197 |
198 | # target to build an object file
199 | vita_llm.c.obj:
200 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/vita_llm.c.obj
201 | .PHONY : vita_llm.c.obj
202 |
203 | vita_llm.i: vita_llm.c.i
204 | .PHONY : vita_llm.i
205 |
206 | # target to preprocess a source file
207 | vita_llm.c.i:
208 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/vita_llm.c.i
209 | .PHONY : vita_llm.c.i
210 |
211 | vita_llm.s: vita_llm.c.s
212 | .PHONY : vita_llm.s
213 |
214 | # target to generate assembly for a file
215 | vita_llm.c.s:
216 | $(MAKE) $(MAKESILENT) -f CMakeFiles/psvita-llm.dir/build.make CMakeFiles/psvita-llm.dir/vita_llm.c.s
217 | .PHONY : vita_llm.c.s
218 |
219 | # Help Target
220 | help:
221 | @echo "The following are some of the valid targets for this Makefile:"
222 | @echo "... all (the default if no target is provided)"
223 | @echo "... clean"
224 | @echo "... depend"
225 | @echo "... edit_cache"
226 | @echo "... rebuild_cache"
227 | @echo "... psvita-llm-velf"
228 | @echo "... psvita-llm.self-self"
229 | @echo "... psvita-llm.vpk-vpk"
230 | @echo "... psvita-llm"
231 | @echo "... common/debugScreen.obj"
232 | @echo "... common/debugScreen.i"
233 | @echo "... common/debugScreen.s"
234 | @echo "... vita_llm.obj"
235 | @echo "... vita_llm.i"
236 | @echo "... vita_llm.s"
237 | .PHONY : help
238 |
239 |
240 |
241 | #=============================================================================
242 | # Special targets to cleanup operation of make.
243 |
244 | # Special rule to run CMake to check the build system integrity.
245 | # No rule that depends on this can have commands that come from listfiles
246 | # because they might be regenerated.
247 | cmake_check_build_system:
248 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
249 | .PHONY : cmake_check_build_system
250 |
251 |
--------------------------------------------------------------------------------
/common/debugScreenFont.c:
--------------------------------------------------------------------------------
1 | /*
2 | * PSP Software Development Kit - http://www.pspdev.org
3 | * -----------------------------------------------------------------------
4 | * Licensed under the BSD license, see LICENSE in PSPSDK root for details.
5 | *
6 | * font.c - Debug Font.
7 | *
8 | * Copyright (c) 2005 Marcus R. Brown
9 | * Copyright (c) 2005 James Forshaw
10 | * Copyright (c) 2005 John Kelley
11 | *
12 | * $Id: font.c 540 2005-07-08 19:35:10Z warren $
13 | */
14 |
15 | PsvDebugScreenFont psvDebugScreenFont = { glyphs:(unsigned char*)
16 | "\x00\x00\x00\x00\x00\x00\x00\x00\x3c\x42\xa5\x81\xa5\x99\x42\x3c"
17 | "\x3c\x7e\xdb\xff\xff\xdb\x66\x3c\x6c\xfe\xfe\xfe\x7c\x38\x10\x00"
18 | "\x10\x38\x7c\xfe\x7c\x38\x10\x00\x10\x38\x54\xfe\x54\x10\x38\x00"
19 | "\x10\x38\x7c\xfe\xfe\x10\x38\x00\x00\x00\x00\x30\x30\x00\x00\x00"
20 | "\xff\xff\xff\xe7\xe7\xff\xff\xff\x38\x44\x82\x82\x82\x44\x38\x00"
21 | "\xc7\xbb\x7d\x7d\x7d\xbb\xc7\xff\x0f\x03\x05\x79\x88\x88\x88\x70"
22 | "\x38\x44\x44\x44\x38\x10\x7c\x10\x30\x28\x24\x24\x28\x20\xe0\xc0"
23 | "\x3c\x24\x3c\x24\x24\xe4\xdc\x18\x10\x54\x38\xee\x38\x54\x10\x00"
24 | "\x10\x10\x10\x7c\x10\x10\x10\x10\x10\x10\x10\xff\x00\x00\x00\x00"
25 | "\x00\x00\x00\xff\x10\x10\x10\x10\x10\x10\x10\xf0\x10\x10\x10\x10"
26 | "\x10\x10\x10\x1f\x10\x10\x10\x10\x10\x10\x10\xff\x10\x10\x10\x10"
27 | "\x10\x10\x10\x10\x10\x10\x10\x10\x00\x00\x00\xff\x00\x00\x00\x00"
28 | "\x00\x00\x00\x1f\x10\x10\x10\x10\x00\x00\x00\xf0\x10\x10\x10\x10"
29 | "\x10\x10\x10\x1f\x00\x00\x00\x00\x10\x10\x10\xf0\x00\x00\x00\x00"
30 | "\x81\x42\x24\x18\x18\x24\x42\x81\x01\x02\x04\x08\x10\x20\x40\x80"
31 | "\x80\x40\x20\x10\x08\x04\x02\x01\x00\x10\x10\xff\x10\x10\x00\x00"
32 | "\x00\x00\x00\x00\x00\x00\x00\x00\x20\x20\x20\x20\x00\x00\x20\x00"
33 | "\x50\x50\x50\x00\x00\x00\x00\x00\x50\x50\xf8\x50\xf8\x50\x50\x00"
34 | "\x20\x78\xa0\x70\x28\xf0\x20\x00\xc0\xc8\x10\x20\x40\x98\x18\x00"
35 | "\x40\xa0\x40\xa8\x90\x98\x60\x00\x10\x20\x40\x00\x00\x00\x00\x00"
36 | "\x10\x20\x40\x40\x40\x20\x10\x00\x40\x20\x10\x10\x10\x20\x40\x00"
37 | "\x20\xa8\x70\x20\x70\xa8\x20\x00\x00\x20\x20\xf8\x20\x20\x00\x00"
38 | "\x00\x00\x00\x00\x00\x20\x20\x40\x00\x00\x00\x78\x00\x00\x00\x00"
39 | "\x00\x00\x00\x00\x00\x60\x60\x00\x00\x00\x08\x10\x20\x40\x80\x00"
40 | "\x70\x88\x98\xa8\xc8\x88\x70\x00\x20\x60\xa0\x20\x20\x20\xf8\x00"
41 | "\x70\x88\x08\x10\x60\x80\xf8\x00\x70\x88\x08\x30\x08\x88\x70\x00"
42 | "\x10\x30\x50\x90\xf8\x10\x10\x00\xf8\x80\xe0\x10\x08\x10\xe0\x00"
43 | "\x30\x40\x80\xf0\x88\x88\x70\x00\xf8\x88\x10\x20\x20\x20\x20\x00"
44 | "\x70\x88\x88\x70\x88\x88\x70\x00\x70\x88\x88\x78\x08\x10\x60\x00"
45 | "\x00\x00\x20\x00\x00\x20\x00\x00\x00\x00\x20\x00\x00\x20\x20\x40"
46 | "\x18\x30\x60\xc0\x60\x30\x18\x00\x00\x00\xf8\x00\xf8\x00\x00\x00"
47 | "\xc0\x60\x30\x18\x30\x60\xc0\x00\x70\x88\x08\x10\x20\x00\x20\x00"
48 | "\x70\x88\x08\x68\xa8\xa8\x70\x00\x20\x50\x88\x88\xf8\x88\x88\x00"
49 | "\xf0\x48\x48\x70\x48\x48\xf0\x00\x30\x48\x80\x80\x80\x48\x30\x00"
50 | "\xe0\x50\x48\x48\x48\x50\xe0\x00\xf8\x80\x80\xf0\x80\x80\xf8\x00"
51 | "\xf8\x80\x80\xf0\x80\x80\x80\x00\x70\x88\x80\xb8\x88\x88\x70\x00"
52 | "\x88\x88\x88\xf8\x88\x88\x88\x00\x70\x20\x20\x20\x20\x20\x70\x00"
53 | "\x38\x10\x10\x10\x90\x90\x60\x00\x88\x90\xa0\xc0\xa0\x90\x88\x00"
54 | "\x80\x80\x80\x80\x80\x80\xf8\x00\x88\xd8\xa8\xa8\x88\x88\x88\x00"
55 | "\x88\xc8\xc8\xa8\x98\x98\x88\x00\x70\x88\x88\x88\x88\x88\x70\x00"
56 | "\xf0\x88\x88\xf0\x80\x80\x80\x00\x70\x88\x88\x88\xa8\x90\x68\x00"
57 | "\xf0\x88\x88\xf0\xa0\x90\x88\x00\x70\x88\x80\x70\x08\x88\x70\x00"
58 | "\xf8\x20\x20\x20\x20\x20\x20\x00\x88\x88\x88\x88\x88\x88\x70\x00"
59 | "\x88\x88\x88\x88\x50\x50\x20\x00\x88\x88\x88\xa8\xa8\xd8\x88\x00"
60 | "\x88\x88\x50\x20\x50\x88\x88\x00\x88\x88\x88\x70\x20\x20\x20\x00"
61 | "\xf8\x08\x10\x20\x40\x80\xf8\x00\x70\x40\x40\x40\x40\x40\x70\x00"
62 | "\x00\x00\x80\x40\x20\x10\x08\x00\x70\x10\x10\x10\x10\x10\x70\x00"
63 | "\x20\x50\x88\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00"
64 | "\x40\x20\x10\x00\x00\x00\x00\x00\x00\x00\x70\x08\x78\x88\x78\x00"
65 | "\x80\x80\xb0\xc8\x88\xc8\xb0\x00\x00\x00\x70\x88\x80\x88\x70\x00"
66 | "\x08\x08\x68\x98\x88\x98\x68\x00\x00\x00\x70\x88\xf8\x80\x70\x00"
67 | "\x10\x28\x20\xf8\x20\x20\x20\x00\x00\x00\x68\x98\x98\x68\x08\x70"
68 | "\x80\x80\xf0\x88\x88\x88\x88\x00\x20\x00\x60\x20\x20\x20\x70\x00"
69 | "\x10\x00\x30\x10\x10\x10\x90\x60\x40\x40\x48\x50\x60\x50\x48\x00"
70 | "\x60\x20\x20\x20\x20\x20\x70\x00\x00\x00\xd0\xa8\xa8\xa8\xa8\x00"
71 | "\x00\x00\xb0\xc8\x88\x88\x88\x00\x00\x00\x70\x88\x88\x88\x70\x00"
72 | "\x00\x00\xb0\xc8\xc8\xb0\x80\x80\x00\x00\x68\x98\x98\x68\x08\x08"
73 | "\x00\x00\xb0\xc8\x80\x80\x80\x00\x00\x00\x78\x80\xf0\x08\xf0\x00"
74 | "\x40\x40\xf0\x40\x40\x48\x30\x00\x00\x00\x90\x90\x90\x90\x68\x00"
75 | "\x00\x00\x88\x88\x88\x50\x20\x00\x00\x00\x88\xa8\xa8\xa8\x50\x00"
76 | "\x00\x00\x88\x50\x20\x50\x88\x00\x00\x00\x88\x88\x98\x68\x08\x70"
77 | "\x00\x00\xf8\x10\x20\x40\xf8\x00\x18\x20\x20\x40\x20\x20\x18\x00"
78 | "\x20\x20\x20\x00\x20\x20\x20\x00\xc0\x20\x20\x10\x20\x20\xc0\x00"
79 | "\x40\xa8\x10\x00\x00\x00\x00\x00\x00\x00\x20\x50\xf8\x00\x00\x00"
80 | "\x70\x88\x80\x80\x88\x70\x20\x60\x90\x00\x00\x90\x90\x90\x68\x00"
81 | "\x10\x20\x70\x88\xf8\x80\x70\x00\x20\x50\x70\x08\x78\x88\x78\x00"
82 | "\x48\x00\x70\x08\x78\x88\x78\x00\x20\x10\x70\x08\x78\x88\x78\x00"
83 | "\x20\x00\x70\x08\x78\x88\x78\x00\x00\x70\x80\x80\x80\x70\x10\x60"
84 | "\x20\x50\x70\x88\xf8\x80\x70\x00\x50\x00\x70\x88\xf8\x80\x70\x00"
85 | "\x20\x10\x70\x88\xf8\x80\x70\x00\x50\x00\x00\x60\x20\x20\x70\x00"
86 | "\x20\x50\x00\x60\x20\x20\x70\x00\x40\x20\x00\x60\x20\x20\x70\x00"
87 | "\x50\x00\x20\x50\x88\xf8\x88\x00\x20\x00\x20\x50\x88\xf8\x88\x00"
88 | "\x10\x20\xf8\x80\xf0\x80\xf8\x00\x00\x00\x6c\x12\x7e\x90\x6e\x00"
89 | "\x3e\x50\x90\x9c\xf0\x90\x9e\x00\x60\x90\x00\x60\x90\x90\x60\x00"
90 | "\x90\x00\x00\x60\x90\x90\x60\x00\x40\x20\x00\x60\x90\x90\x60\x00"
91 | "\x40\xa0\x00\xa0\xa0\xa0\x50\x00\x40\x20\x00\xa0\xa0\xa0\x50\x00"
92 | "\x90\x00\x90\x90\xb0\x50\x10\xe0\x50\x00\x70\x88\x88\x88\x70\x00"
93 | "\x50\x00\x88\x88\x88\x88\x70\x00\x20\x20\x78\x80\x80\x78\x20\x20"
94 | "\x18\x24\x20\xf8\x20\xe2\x5c\x00\x88\x50\x20\xf8\x20\xf8\x20\x00"
95 | "\xc0\xa0\xa0\xc8\x9c\x88\x88\x8c\x18\x20\x20\xf8\x20\x20\x20\x40"
96 | "\x10\x20\x70\x08\x78\x88\x78\x00\x10\x20\x00\x60\x20\x20\x70\x00"
97 | "\x20\x40\x00\x60\x90\x90\x60\x00\x20\x40\x00\x90\x90\x90\x68\x00"
98 | "\x50\xa0\x00\xa0\xd0\x90\x90\x00\x28\x50\x00\xc8\xa8\x98\x88\x00"
99 | "\x00\x70\x08\x78\x88\x78\x00\xf8\x00\x60\x90\x90\x90\x60\x00\xf0"
100 | "\x20\x00\x20\x40\x80\x88\x70\x00\x00\x00\x00\xf8\x80\x80\x00\x00"
101 | "\x00\x00\x00\xf8\x08\x08\x00\x00\x84\x88\x90\xa8\x54\x84\x08\x1c"
102 | "\x84\x88\x90\xa8\x58\xa8\x3c\x08\x20\x00\x00\x20\x20\x20\x20\x00"
103 | "\x00\x00\x24\x48\x90\x48\x24\x00\x00\x00\x90\x48\x24\x48\x90\x00"
104 | "\x28\x50\x20\x50\x88\xf8\x88\x00\x28\x50\x70\x08\x78\x88\x78\x00"
105 | "\x28\x50\x00\x70\x20\x20\x70\x00\x28\x50\x00\x20\x20\x20\x70\x00"
106 | "\x28\x50\x00\x70\x88\x88\x70\x00\x50\xa0\x00\x60\x90\x90\x60\x00"
107 | "\x28\x50\x00\x88\x88\x88\x70\x00\x50\xa0\x00\xa0\xa0\xa0\x50\x00"
108 | "\xfc\x48\x48\x48\xe8\x08\x50\x20\x00\x50\x00\x50\x50\x50\x10\x20"
109 | "\xc0\x44\xc8\x54\xec\x54\x9e\x04\x10\xa8\x40\x00\x00\x00\x00\x00"
110 | "\x00\x20\x50\x88\x50\x20\x00\x00\x88\x10\x20\x40\x80\x28\x00\x00"
111 | "\x7c\xa8\xa8\x68\x28\x28\x28\x00\x38\x40\x30\x48\x48\x30\x08\x70"
112 | "\x00\x00\x00\x00\x00\x00\xff\xff\xf0\xf0\xf0\xf0\x0f\x0f\x0f\x0f"
113 | "\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x00\x00\x00\x00\x00"
114 | "\x00\x00\x00\x3c\x3c\x00\x00\x00\xff\xff\xff\xff\xff\xff\x00\x00"
115 | "\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\x0f\x0f\x0f\x0f\xf0\xf0\xf0\xf0"
116 | "\xfc\xfc\xfc\xfc\xfc\xfc\xfc\xfc\x03\x03\x03\x03\x03\x03\x03\x03"
117 | "\x3f\x3f\x3f\x3f\x3f\x3f\x3f\x3f\x11\x22\x44\x88\x11\x22\x44\x88"
118 | "\x88\x44\x22\x11\x88\x44\x22\x11\xfe\x7c\x38\x10\x00\x00\x00\x00"
119 | "\x00\x00\x00\x00\x10\x38\x7c\xfe\x80\xc0\xe0\xf0\xe0\xc0\x80\x00"
120 | "\x01\x03\x07\x0f\x07\x03\x01\x00\xff\x7e\x3c\x18\x18\x3c\x7e\xff"
121 | "\x81\xc3\xe7\xff\xff\xe7\xc3\x81\xf0\xf0\xf0\xf0\x00\x00\x00\x00"
122 | "\x00\x00\x00\x00\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x00\x00\x00\x00"
123 | "\x00\x00\x00\x00\xf0\xf0\xf0\xf0\x33\x33\xcc\xcc\x33\x33\xcc\xcc"
124 | "\x00\x20\x20\x50\x50\x88\xf8\x00\x20\x20\x70\x20\x70\x20\x20\x00"
125 | "\x00\x00\x00\x50\x88\xa8\x50\x00\xff\xff\xff\xff\xff\xff\xff\xff"
126 | "\x00\x00\x00\x00\xff\xff\xff\xff\xf0\xf0\xf0\xf0\xf0\xf0\xf0\xf0"
127 | "\x0f\x0f\x0f\x0f\x0f\x0f\x0f\x0f\xff\xff\xff\xff\x00\x00\x00\x00"
128 | "\x00\x00\x68\x90\x90\x90\x68\x00\x30\x48\x48\x70\x48\x48\x70\xc0"
129 | "\xf8\x88\x80\x80\x80\x80\x80\x00\xf8\x50\x50\x50\x50\x50\x98\x00"
130 | "\xf8\x88\x40\x20\x40\x88\xf8\x00\x00\x00\x78\x90\x90\x90\x60\x00"
131 | "\x00\x50\x50\x50\x50\x68\x80\x80\x00\x50\xa0\x20\x20\x20\x20\x00"
132 | "\xf8\x20\x70\xa8\xa8\x70\x20\xf8\x20\x50\x88\xf8\x88\x50\x20\x00"
133 | "\x70\x88\x88\x88\x50\x50\xd8\x00\x30\x40\x40\x20\x50\x50\x50\x20"
134 | "\x00\x00\x00\x50\xa8\xa8\x50\x00\x08\x70\xa8\xa8\xa8\x70\x80\x00"
135 | "\x38\x40\x80\xf8\x80\x40\x38\x00\x70\x88\x88\x88\x88\x88\x88\x00"
136 | "\x00\xf8\x00\xf8\x00\xf8\x00\x00\x20\x20\xf8\x20\x20\x00\xf8\x00"
137 | "\xc0\x30\x08\x30\xc0\x00\xf8\x00\x18\x60\x80\x60\x18\x00\xf8\x00"
138 | "\x10\x28\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\xa0\x40"
139 | "\x00\x20\x00\xf8\x00\x20\x00\x00\x00\x50\xa0\x00\x50\xa0\x00\x00"
140 | "\x00\x18\x24\x24\x18\x00\x00\x00\x00\x30\x78\x78\x30\x00\x00\x00"
141 | "\x00\x00\x00\x00\x30\x00\x00\x00\x3e\x20\x20\x20\xa0\x60\x20\x00"
142 | "\xa0\x50\x50\x50\x00\x00\x00\x00\x40\xa0\x20\x40\xe0\x00\x00\x00"
143 | "\x00\x38\x38\x38\x38\x38\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00",
144 | width :8, height:8, first:0, last:255, size_w:8, size_h:8};
--------------------------------------------------------------------------------
/common/debugScreen.c:
--------------------------------------------------------------------------------
1 | #ifndef DEBUG_SCREEN_C
2 | #define DEBUG_SCREEN_C
3 |
4 | /*
5 | * debugScreen.c of Vita SDK
6 | *
7 | * - psvDebugScreenInit()
8 | * Initializes debug screen for output.
9 | *
10 | * - psvDebugScreenPuts()
11 | * Similar to the C library function puts() writes a string to the debug
12 | * screen up to but not including the NUL character.
13 | * Supports the most important CSI sequences of ECMA-48 / ISO/IEC 6429:1992.
14 | * Graphic Rendition Combination Mode (GRCM) supported is Cumulative.
15 | * Modifications:
16 | * - CSI SGR codes 30-37/38/39 & 40-47/48/49 set standard/fitting/default intensity, so instead of "\e[1;31m" use "\e31;1m"
17 | * - ANSI color #8 is made darker (40<>80), so that "dark" white is still lighter than "bright" dark
18 | * - support 16 save storages for CSI s and CSI u, e.g "\e[8s" and "\e[8u"
19 | * [1] https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_sequences
20 | * [2] https://jonasjacek.github.io/colors/
21 | * [3] https://www.ecma-international.org/publications/standards/Ecma-048.htm
22 | * [4] https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
23 | * [5] http://man7.org/linux/man-pages/man4/console_codes.4.html
24 | *
25 | * (CSI = "\e[")
26 | * CSI [n] s = Save Cursor Position to slot #n (0-15). Default 0.
27 | * CSI [n] u = Restore Cursor Position from slot #n (0-15). Default 0.
28 | * CSI n A = Cursor Up times.
29 | * CSI n B = Cursor Down times.
30 | * CSI n C = Cursor Forward times.
31 | * CSI n D = Cursor Back times.
32 | * CSI n E = Cursor Next Line times and to Beginning of that Line.
33 | * CSI n F = Cursor Previous Line times and to Beginning of that Line.
34 | * CSI n G = Cursor to Column . The value is 1-based and defaults to 1 (first column) if omitted.
35 | * CSI n ; m H = Cursor to Row and Column . The values are 1-based and default to 1 (top left corner) if omitted.
36 | * CSI n ; m f = Cursor to Row and Column . The values are 1-based and default to 1 (top left corner) if omitted.
37 | * CSI [n] J = Clears part of the screen. Cursor position does not change.
38 | * 0 (default) from cursor to end of screen.
39 | * 1 from cursor to beginning of the screen.
40 | * 2 entire screen
41 | * CSI [n] K = Clears part of the line. Cursor position does not change.
42 | * 0 (default) from cursor to end of line.
43 | * 1 from cursor to beginning of line.
44 | * 2 clear entire line.
45 | * CSI [n] m = Sets the appearance of the following characters.
46 | * 0 Reset all (colors and inversion) (default)
47 | * 1 Increased intensity ("bright" color)
48 | * 2 Decreased intensity ("faint"/"dark" color)
49 | * 7 Enable inversion
50 | * 22 Standard intensity ("normal" color)
51 | * 27 Disable inversion
52 | * 30–37 Set ANSI foreground color with standard intensity
53 | * 38 Set foreground color. Arguments are 5; or 2;;;
54 | * 39 Default foreground color
55 | * 40–47 Set standard ANSI background color with standard intensity
56 | * 48 Set background color. Arguments are 5; or 2;;;
57 | * 49 Default background color
58 | * 90–97 Set ANSI foreground color with increased intensity
59 | * 100–107 Set ANSI background color with increased intensity
60 | *
61 | * - psvDebugScreenPrintf()
62 | * Similar to the C library function printf() formats a string and ouputs
63 | * it via psvDebugScreenPuts() to the debug screen.
64 | *
65 | * - psvDebugScreenGetColorStateCopy(ColorState *copy)
66 | * Get copy of current color state.
67 | *
68 | * - psvDebugScreenGetCoordsXY(int *x, int *y)
69 | * Get copy of current pixel coordinates.
70 | * Allows for multiple and custom position stores.
71 | * Allows correct positioning when using different font sizes.
72 | *
73 | * - psvDebugScreenSetCoordsXY(int *x, int *y)
74 | * Set pixel coordinates.
75 | * Allows for multiple and custom position stores.
76 | * Allows correct positioning when using different font sizes.
77 | *
78 | * - PsvDebugScreenFont *psvDebugScreenGetFont()
79 | * Get current font.
80 | *
81 | * - PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font) {
82 | * Set font. Returns current font.
83 | *
84 | * - PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font) {
85 | * Scales a font by 2 (e.g. 8x8 to 16x16) and returns new scaled font.
86 | *
87 | * Also see the following samples:
88 | * - debugscreen
89 | * - debug_print
90 | *
91 | */
92 |
93 | #include // for malloc(), free()
94 | #include // for vsnprintf()
95 | #include // for memset(), memcpy()
96 | #include // for va_list, va_start(), va_end()
97 | #include
98 |
99 | #include "debugScreen.h"
100 |
101 | #include "debugScreenFont.c"
102 |
103 | #define SCREEN_FB_WIDTH (960) // frame buffer aligned width for accessing vram
104 | #define SCREEN_FB_SIZE (2 * 1024 * 1024) // Must be 256KB aligned
105 | #ifndef SCREEN_TAB_SIZE // this allows easy overriding
106 | #define SCREEN_TAB_SIZE (8)
107 | #endif
108 | #define SCREEN_TAB_W ((F)->size_w * (SCREEN_TAB_SIZE))
109 | #define F psvDebugScreenFontCurrent
110 |
111 | #define FROM_FULL_RGB(r,g,b ) ( ((b)<<16) | ((g)<<8) | (r) )
112 | #define CONVERT_RGB_BGR(rgb) rgb = ( (((rgb)&0x0000FF)<<16) | ((rgb)&0x00FF00) | (((rgb)&0xFF0000)>>16) )
113 |
114 | #define CLEARSCRNBLOCK(H,toH,W,toW,color) for (int h = (H); h < (toH); h++) for (int w = (W); w < (toW); w++) ((uint32_t*)base)[h*(SCREEN_FB_WIDTH) + w] = (color);
115 | #define CLEARSCRNLINES(H,toH,color) { uint32_t *pixel = (uint32_t *)base + ((H) * (SCREEN_FB_WIDTH)); int i = (((toH) - (H)) * (SCREEN_FB_WIDTH)); for (; i > 0; i--) *pixel++ = (color); }
116 |
117 | #define SAVE_STORAGES 16
118 |
119 | static int initialized = 0;
120 | static int mutex, coordX, coordY;
121 | static int savedX[SAVE_STORAGES] = { 0 }, savedY[SAVE_STORAGES] = { 0 };
122 | static ColorState colors = {
123 | 0, 0, // truecolor flags
124 | 0, 0, // truecolors
125 | 0, 0, 0, 0, 0, // ANSI/VTERM/GREYSCALE colors
126 | 7, 22, 0, 22, 0, // default colors (ANSI/VTERM/GREYSCALE)
127 | 0, 0 // current colors
128 | };
129 |
130 | static PsvDebugScreenFont *psvDebugScreenFontCurrent = &psvDebugScreenFont;
131 |
132 | #ifdef __vita__
133 | #include
134 | #include
135 | #include
136 | static SceUID displayblock;
137 | static void* base; // pointer to frame buffer
138 | #else
139 | #define NO_psvDebugScreenInit
140 | #ifndef psvDebugScreenInitReplacement
141 | #define psvDebugScreenInitReplacement(...)
142 | #endif
143 | #define sceKernelLockMutex(m,v,x) m=v
144 | #define sceKernelUnlockMutex(m,v) m=v
145 | static char base[(SCREEN_FB_WIDTH) * (SCREEN_HEIGHT) * 4];
146 | #endif
147 |
148 | static uint32_t DARK_COLORS_BGR[8] = {
149 | 0x000000, 0x000040, 0x004000, 0x004040, 0x400000, 0x400040, 0x404000, 0x808080, // 0-7
150 | };
151 |
152 | // ANSI/VTERM/GREYSCALE palette: https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
153 | // modifications:
154 | // - #8 is made darker (40<>80), so that "dark" white is still lighter than "bright" dark
155 | static uint32_t ANSI_COLORS_BGR[256] = {
156 | 0x000000, 0x000080, 0x008000, 0x008080, 0x800000, 0x800080, 0x808000, 0xc0c0c0, // 0-7
157 | 0x404040, 0x0000ff, 0x00ff00, 0x00ffff, 0xff0000, 0xff00ff, 0xffff00, 0xffffff, // 8-15
158 | 0x000000, 0x5f0000, 0x870000, 0xaf0000, 0xd70000, 0xff0000, 0x005f00, 0x5f5f00, // 16-23
159 | 0x875f00, 0xaf5f00, 0xd75f00, 0xff5f00, 0x008700, 0x5f8700, 0x878700, 0xaf8700, // 24-31
160 | 0xd78700, 0xff8700, 0x00af00, 0x5faf00, 0x87af00, 0xafaf00, 0xd7af00, 0xffaf00, // 32-39
161 | 0x00d700, 0x5fd700, 0x87d700, 0xafd700, 0xd7d700, 0xffd700, 0x00ff00, 0x5fff00, // 40-47
162 | 0x87ff00, 0xafff00, 0xd7ff00, 0xffff00, 0x00005f, 0x5f005f, 0x87005f, 0xaf005f, // 48-55
163 | 0xd7005f, 0xff005f, 0x005f5f, 0x5f5f5f, 0x875f5f, 0xaf5f5f, 0xd75f5f, 0xff5f5f, // 56-63
164 | 0x00875f, 0x5f875f, 0x87875f, 0xaf875f, 0xd7875f, 0xff875f, 0x00af5f, 0x5faf5f, // 64-71
165 | 0x87af5f, 0xafaf5f, 0xd7af5f, 0xffaf5f, 0x00d75f, 0x5fd75f, 0x87d75f, 0xafd75f, // 72-79
166 | 0xd7d75f, 0xffd75f, 0x00ff5f, 0x5fff5f, 0x87ff5f, 0xafff5f, 0xd7ff5f, 0xffff5f, // 80-87
167 | 0x000087, 0x5f0087, 0x870087, 0xaf0087, 0xd70087, 0xff0087, 0x005f87, 0x5f5f87, // 88-95
168 | 0x875f87, 0xaf5f87, 0xd75f87, 0xff5f87, 0x008787, 0x5f8787, 0x878787, 0xaf8787, // 96-103
169 | 0xd78787, 0xff8787, 0x00af87, 0x5faf87, 0x87af87, 0xafaf87, 0xd7af87, 0xffaf87, // 104-111
170 | 0x00d787, 0x5fd787, 0x87d787, 0xafd787, 0xd7d787, 0xffd787, 0x00ff87, 0x5fff87, // 112-119
171 | 0x87ff87, 0xafff87, 0xd7ff87, 0xffff87, 0x0000af, 0x5f00af, 0x8700af, 0xaf00af, // 120-127
172 | 0xd700af, 0xff00af, 0x005faf, 0x5f5faf, 0x875faf, 0xaf5faf, 0xd75faf, 0xff5faf, // 128-135
173 | 0x0087af, 0x5f87af, 0x8787af, 0xaf87af, 0xd787af, 0xff87af, 0x00afaf, 0x5fafaf, // 136-143
174 | 0x87afaf, 0xafafaf, 0xd7afaf, 0xffafaf, 0x00d7af, 0x5fd7af, 0x87d7af, 0xafd7af, // 144-151
175 | 0xd7d7af, 0xffd7af, 0x00ffaf, 0x5fffaf, 0x87ffaf, 0xafffaf, 0xd7ffaf, 0xffffaf, // 152-159
176 | 0x0000d7, 0x5f00d7, 0x8700d7, 0xaf00d7, 0xd700d7, 0xff00d7, 0x005fd7, 0x5f5fd7, // 160-167
177 | 0x875fd7, 0xaf5fd7, 0xd75fd7, 0xff5fd7, 0x0087d7, 0x5f87d7, 0x8787d7, 0xaf87d7, // 168-175
178 | 0xd787d7, 0xff87d7, 0x00afd7, 0x5fafd7, 0x87afd7, 0xafafd7, 0xd7afd7, 0xffafd7, // 176-183
179 | 0x00d7d7, 0x5fd7d7, 0x87d7d7, 0xafd7d7, 0xd7d7d7, 0xffd7d7, 0x00ffd7, 0x5fffd7, // 184-191
180 | 0x87ffd7, 0xafffd7, 0xd7ffd7, 0xffffd7, 0x0000ff, 0x5f00ff, 0x8700ff, 0xaf00ff, // 192-199
181 | 0xd700ff, 0xff00ff, 0x005fff, 0x5f5fff, 0x875fff, 0xaf5fff, 0xd75fff, 0xff5fff, // 200-207
182 | 0x0087ff, 0x5f87ff, 0x8787ff, 0xaf87ff, 0xd787ff, 0xff87ff, 0x00afff, 0x5fafff, // 208-215
183 | 0x87afff, 0xafafff, 0xd7afff, 0xffafff, 0x00d7ff, 0x5fd7ff, 0x87d7ff, 0xafd7ff, // 216-223
184 | 0xd7d7ff, 0xffd7ff, 0x00ffff, 0x5fffff, 0x87ffff, 0xafffff, 0xd7ffff, 0xffffff, // 224-231
185 | 0x080808, 0x121212, 0x1c1c1c, 0x262626, 0x303030, 0x3a3a3a, 0x444444, 0x4e4e4e, // 232-239
186 | 0x585858, 0x626262, 0x6c6c6c, 0x767676, 0x808080, 0x8a8a8a, 0x949494, 0x9e9e9e, // 240-247
187 | 0xa8a8a8, 0xb2b2b2, 0xbcbcbc, 0xc6c6c6, 0xd0d0d0, 0xdadada, 0xe4e4e4, 0xeeeeee, // 248-255
188 | };
189 |
190 | /*
191 | * Reset foreground color to default
192 | */
193 | static void psvDebugScreenResetFgColor(void) {
194 | colors.fgTrueColorFlag = 0;
195 | colors.fgTrueColor = 0;
196 | colors.fgIndex = colors.fgIndexDefault;
197 | colors.fgIntensity = colors.fgIntensityDefault;
198 | }
199 |
200 | /*
201 | * Reset background color to default
202 | */
203 | static void psvDebugScreenResetBgColor(void) {
204 | colors.bgTrueColorFlag = 0;
205 | colors.bgTrueColor = 0;
206 | colors.bgIndex = colors.bgIndexDefault;
207 | colors.bgIntensity = colors.bgIntensityDefault;
208 | }
209 |
210 | /*
211 | * Reset inversion state to default
212 | */
213 | static void psvDebugScreenResetInversion(void) {
214 | colors.inversion = colors.inversionDefault;
215 | }
216 |
217 | /*
218 | * Determine colors according to current color state
219 | */
220 | static void psvDebugScreenSetColors(void) {
221 | uint32_t *color_fg, *color_bg;
222 |
223 | // special case: inversion
224 | if (!colors.inversion) {
225 | color_fg = &colors.color_fg;
226 | color_bg = &colors.color_bg;
227 | } else {
228 | color_fg = &colors.color_bg;
229 | color_bg = &colors.color_fg;
230 | }
231 |
232 | // foregound color
233 | if ((colors.fgIndex<=7) && (colors.fgIntensity==1)) { // ANSI palette with increased intensity
234 | colors.fgIndex |= 0x8;
235 | } else if ((colors.fgIndex<=15) && (colors.fgIntensity!=1)) { // ANSI palette with standard/decreased intensity
236 | colors.fgIndex &= 0x7;
237 | }
238 | if (colors.fgTrueColorFlag) {
239 | *color_fg = colors.fgTrueColor;
240 | } else {
241 | if ((colors.fgIndex<=7) && (colors.fgIntensity==2)) { // "ANSI" palette with decreased intensity
242 | *color_fg = DARK_COLORS_BGR[colors.fgIndex];
243 | } else { // ANSI/VTERM/GREYSCALE palette
244 | *color_fg = ANSI_COLORS_BGR[colors.fgIndex];
245 | }
246 | }
247 | *color_fg |= 0xFF000000; // opaque
248 |
249 | // backgound color
250 | if ((colors.bgIndex<=7) && (colors.bgIntensity==1)) { // ANSI palette with increased intensity
251 | colors.bgIndex |= 0x8;
252 | } else if ((colors.bgIndex<=15) && (colors.bgIntensity!=1)) { // ANSI palette with standard/decreased intensity
253 | colors.bgIndex &= 0x7;
254 | }
255 | if (colors.bgTrueColorFlag) {
256 | *color_bg = colors.bgTrueColor;
257 | } else {
258 | if ((colors.bgIndex<=7) && (colors.bgIntensity==2)) { // "ANSI" palette with decreased intensity
259 | *color_bg = DARK_COLORS_BGR[colors.bgIndex];
260 | } else { // ANSI/VTERM/GREYSCALE palette
261 | *color_bg = ANSI_COLORS_BGR[colors.bgIndex];
262 | }
263 | }
264 | *color_bg |= 0xFF000000; // opaque
265 | }
266 |
267 | /*
268 | * Parse CSI sequences
269 | */
270 | static size_t psvDebugScreenEscape(const unsigned char *str) {
271 | unsigned int i, argc, arg[32] = { 0 };
272 | unsigned int c;
273 | uint32_t unit, mode;
274 | int *colorTrueColorFlag;
275 | uint32_t *colorTrueColor;
276 | unsigned char *colorIndex, *colorIntensity;
277 | for (i = 0, argc = 0; (argc < (sizeof(arg)/sizeof(*arg))) && (str[i] != '\0'); i++) {
278 | switch (str[i]) {
279 | // numeric char
280 | case '0':
281 | case '1':
282 | case '2':
283 | case '3':
284 | case '4':
285 | case '5':
286 | case '6':
287 | case '7':
288 | case '8':
289 | case '9':
290 | arg[argc] = (arg[argc] * 10) + (str[i] - '0');
291 | continue;
292 | // argument separator
293 | case ';': argc++; continue;
294 | // CSI commands
295 | // save/restore position
296 | case 's':
297 | if (arg[0]size_h; return i;
304 | case 'B': coordY += arg[0] * (F)->size_h; return i;
305 | case 'C': coordX += arg[0] * (F)->size_w; return i;
306 | case 'D': coordX -= arg[0] * (F)->size_w; return i;
307 | // cursor movement to beginning of next/previous line(s)
308 | case 'E': coordY += arg[0] * (F)->size_h; coordX = 0; return i;
309 | case 'F': coordY -= arg[0] * (F)->size_h; coordX = 0; return i;
310 | // cursor positioning
311 | case 'G': coordX = (arg[0]-1) * (F)->size_w; return i;
312 | case 'H':
313 | case 'f':
314 | coordY = (arg[0]-1) * (F)->size_h;
315 | coordX = (arg[1]-1) * (F)->size_w;
316 | return i;
317 | // clear part of "J"=screen or "K"=Line, so J code re-uses part of K
318 | case 'J':
319 | case 'K':
320 | if (arg[0]==0) { // from cursor to end of line/screen
321 | CLEARSCRNBLOCK(coordY, coordY + (F)->size_h, coordX, (SCREEN_WIDTH), colors.color_bg); // line
322 | if (str[i]=='J') CLEARSCRNLINES(coordY + (F)->size_h, (SCREEN_HEIGHT), colors.color_bg); // screen
323 | } else if (arg[0]==1) { // from beginning of line/screen to cursor
324 | CLEARSCRNBLOCK(coordY, coordY + (F)->size_h, 0, coordX, colors.color_bg); // line
325 | if (str[i]=='J') CLEARSCRNLINES(0, coordY, colors.color_bg); // screen
326 | } else if (arg[0]==2) { // whole line/screen
327 | if (str[i]=='K') CLEARSCRNLINES(coordY, coordY + (F)->size_h, colors.color_bg) // line
328 | else if (str[i]=='J') CLEARSCRNLINES(0, (SCREEN_HEIGHT), colors.color_bg); // screen
329 | }
330 | return i;
331 | // color
332 | case 'm':
333 | for (c = 0; c <= argc; c++) {
334 | switch (arg[c]) {
335 | // reset all
336 | case 0:
337 | psvDebugScreenResetFgColor();
338 | psvDebugScreenResetBgColor();
339 | psvDebugScreenResetInversion();
340 | continue;
341 | break;
342 | // intensity
343 | case 1: // increased = "bright" color
344 | case 2: // decreased = "dark" color
345 | case 22: // standard = "normal" color
346 | colors.fgIntensity = arg[c];
347 | continue;
348 | break;
349 | // inversion
350 | case 7: // enable
351 | colors.inversion = 1;
352 | continue;
353 | break;
354 | case 27: // disable
355 | colors.inversion = 0;
356 | continue;
357 | break;
358 | // set from color map or truecolor
359 | case 38: // foreground color
360 | case 48: // background color
361 | mode = arg[c] / 10;
362 | colorTrueColorFlag = mode&1 ? &colors.fgTrueColorFlag : &colors.bgTrueColorFlag;
363 | if (arg[c+1]==5) { // 8-bit: [0-15][16-231][232-255] color map
364 | *colorTrueColorFlag = 0;
365 | colorIndex = mode&1 ? &colors.fgIndex : &colors.bgIndex;
366 | *colorIndex = arg[c+2] & 0xFF;
367 | colorIntensity = mode&1 ? &colors.fgIntensity : &colors.bgIntensity;
368 | *colorIntensity = ((*colorIndex>=8) && (*colorIndex<=15)) ? 1 : 22;
369 | c+=2; // extra arguments
370 | } else if (arg[c+1]==2) { // 24-bit color space
371 | *colorTrueColorFlag = 1;
372 | colorTrueColor = mode&1 ? &colors.fgTrueColor : &colors.bgTrueColor;
373 | *colorTrueColor = FROM_FULL_RGB(arg[c+2], arg[c+3], arg[c+4]);
374 | c+=4; // extra arguments
375 | }
376 | continue;
377 | break;
378 | // default color
379 | case 39: // foreground color
380 | psvDebugScreenResetFgColor();
381 | continue;
382 | break;
383 | case 49: // background color
384 | psvDebugScreenResetBgColor();
385 | continue;
386 | break;
387 | // custom color reset
388 | default:
389 | // ANSI colors (30-37, 40-47, 90-97, 100-107)
390 | mode = arg[c] / 10;
391 | if ((mode!=3) && (mode!=4) && (mode!=9) && (mode!=10)) continue; // skip unsupported modes
392 | unit = arg[c] % 10;
393 | if (unit>7) continue; // skip unsupported modes
394 | colorTrueColorFlag = mode&1 ? &colors.fgTrueColorFlag : &colors.bgTrueColorFlag;
395 | *colorTrueColorFlag = 0;
396 | colorIndex = mode&1 ? &colors.fgIndex : &colors.bgIndex;
397 | *colorIndex = unit;
398 | colorIntensity = mode&1 ? &colors.fgIntensity : &colors.bgIntensity;
399 | *colorIntensity = mode&8 ? 1 : 22;
400 | break;
401 | }
402 | }
403 | psvDebugScreenSetColors();
404 | return i;
405 | }
406 | }
407 | return 0;
408 | }
409 |
410 | /*
411 | * Initialize debug screen
412 | */
413 | int psvDebugScreenInit() {
414 | psvDebugScreenResetFgColor();
415 | psvDebugScreenResetBgColor();
416 | psvDebugScreenResetInversion();
417 | psvDebugScreenSetColors();
418 |
419 | #ifdef NO_psvDebugScreenInit
420 | psvDebugScreenInitReplacement();
421 | initialized = 1;
422 | return 0; // avoid linking non-initializer (prx) with sceDisplay/sceMemory
423 | #else
424 | mutex = sceKernelCreateMutex("log_mutex", 0, 0, NULL);
425 | displayblock = sceKernelAllocMemBlock("display", SCE_KERNEL_MEMBLOCK_TYPE_USER_CDRAM_RW, (SCREEN_FB_SIZE), NULL);
426 | if (displayblock < 0)
427 | return displayblock;
428 | sceKernelGetMemBlockBase(displayblock, (void**)&base);
429 | SceDisplayFrameBuf frame = { sizeof(frame), base, (SCREEN_FB_WIDTH), 0, (SCREEN_WIDTH), (SCREEN_HEIGHT) };
430 | initialized = 1;
431 | return sceDisplaySetFrameBuf(&frame, SCE_DISPLAY_SETBUF_NEXTFRAME);
432 | #endif
433 | }
434 |
435 | /*
436 | * Finalize debug screen
437 | */
438 | int psvDebugScreenFinish() {
439 | if (!initialized)
440 | return -1;
441 |
442 | initialized = 0;
443 |
444 | #ifdef NO_psvDebugScreenInit
445 | return 0;
446 | #else
447 | sceKernelDeleteMutex(mutex);
448 | sceDisplaySetFrameBuf(NULL, SCE_DISPLAY_SETBUF_IMMEDIATE);
449 | return sceKernelFreeMemBlock(displayblock);
450 | #endif
451 | }
452 |
453 | __attribute__((destructor)) static void psvDebugScreenDestructor() {
454 | psvDebugScreenFinish();
455 | }
456 |
457 | /*
458 | * Draw text onto debug screen
459 | */
460 | int psvDebugScreenPuts(const char * _text) {
461 | const unsigned char*text = (const unsigned char*)_text;
462 | int c;
463 | unsigned char t;
464 | unsigned char drawDummy;
465 | //
466 | uint32_t *vram;
467 | int bits_per_glyph = ((F)->width * (F)->height);
468 | int bitmap_offset;
469 | unsigned char *font;
470 | int row;
471 | int max_row;
472 | int col;
473 | unsigned char mask;
474 | uint32_t *pixel;
475 |
476 | sceKernelLockMutex(mutex, 1, NULL);
477 | for (c = 0; text[c] ; c++) {
478 | t = text[c];
479 | // handle CSI sequence
480 | if ((t == '\e') && (text[c+1] == '[')) {
481 | c += psvDebugScreenEscape(text + c + 2) + 2;
482 | if (coordX < 0) coordX = 0; // CSI position are 1-based,
483 | if (coordY < 0) coordY = 0; // prevent 0-based coordinate from producing a negative X/Y
484 | continue;
485 | }
486 | // handle non-printable characters #1 (line-dependent codes)
487 | if (t == '\n') {
488 | coordX = 0;
489 | coordY += (F)->size_h;
490 | continue;
491 | }
492 | if (t == '\r') {
493 | coordX = 0;
494 | continue;
495 | }
496 | // check if glyph fits in line
497 | if ((coordX + (F)->width) > (SCREEN_WIDTH)) {
498 | coordY += (F)->size_h;
499 | coordX = 0;
500 | }
501 | // check if glyph fits in screen
502 | if ((coordY + (F)->height) > (SCREEN_HEIGHT)) {
503 | coordX = coordY = 0;
504 | }
505 | // handle non-printable characters #2
506 | if (t == '\t') {
507 | coordX += (SCREEN_TAB_W) - (coordX % (SCREEN_TAB_W));
508 | continue;
509 | }
510 |
511 | // draw glyph or dummy glyph (dotted line in the middle)
512 | // works also with not byte-aligned glyphs
513 | vram = ((uint32_t*)base) + coordX + (coordY * (SCREEN_FB_WIDTH));
514 | row = 0;
515 | // check if glyph is available in font
516 | if ((t > (F)->last) || (t < (F)->first)) {
517 | drawDummy = 1;
518 | bitmap_offset = 0;
519 | font = NULL;
520 | mask = 1 << 7;
521 | } else {
522 | drawDummy = 0;
523 | bitmap_offset = (t - (F)->first) * bits_per_glyph;
524 | font = &(F)->glyphs[ (bitmap_offset / 8) ];
525 | mask = 1 << 7;
526 | for (col = (bitmap_offset % 8); col > 0; col--, mask >>= 1);
527 | }
528 | // special case: dummy glyph, clear to middle height
529 | max_row = 0;
530 | if (drawDummy) {
531 | max_row = (F)->height / 2;
532 | for (; row < max_row; row++, vram += (SCREEN_FB_WIDTH)) {
533 | pixel = vram;
534 | col = 0;
535 | for (; col < (F)->size_w ; col++) {
536 | *pixel++ = colors.color_bg;
537 | }
538 | }
539 | }
540 | // draw font glyph or dummy glyph
541 | if (drawDummy) {
542 | max_row++;
543 | if (max_row > (F)->height) max_row = (F)->height;
544 | } else {
545 | max_row = (F)->height;
546 | }
547 | for (; row < max_row; row++, vram += (SCREEN_FB_WIDTH)) {
548 | pixel = vram;
549 | col = 0;
550 | for (; col < (F)->width ; col++, mask >>= 1) {
551 | if (drawDummy) {
552 | *pixel++ = (col&1) ? colors.color_fg : colors.color_bg;
553 | } else {
554 | if (!mask) { font++; mask = 1 << 7; } // no more bits: we exhausted this byte
555 | *pixel++ = (*font&mask) ? colors.color_fg : colors.color_bg;
556 | }
557 | }
558 | // right margin
559 | for (; col < (F)->size_w ; col++)
560 | *pixel++ = colors.color_bg;
561 | }
562 | // draw bottom margin
563 | max_row = (F)->size_h;
564 | for (; row < (F)->size_h; row++, vram += (SCREEN_FB_WIDTH))
565 | for (pixel = vram, col = 0; col < (F)->size_w ; col++)
566 | *pixel++ = colors.color_bg;
567 | // advance X position
568 | coordX += (F)->size_w;
569 | }
570 | sceKernelUnlockMutex(mutex, 1);
571 | return c;
572 | }
573 |
574 |
575 | /*
576 | * Printf text onto debug screen
577 | */
578 | __attribute__((__format__ (__printf__, 1, 2)))
579 | int psvDebugScreenPrintf(const char *format, ...) {
580 | char buf[4096];
581 |
582 | va_list opt;
583 | va_start(opt, format);
584 | int ret = vsnprintf(buf, sizeof(buf), format, opt);
585 | psvDebugScreenPuts(buf);
586 | va_end(opt);
587 |
588 | return ret;
589 | }
590 |
591 | /*
592 | * Return copy of color state
593 | */
594 | void psvDebugScreenGetColorStateCopy(ColorState *copy) {
595 | if (copy) {
596 | memcpy(copy, &colors, sizeof(ColorState));
597 | CONVERT_RGB_BGR(copy->fgTrueColor);
598 | CONVERT_RGB_BGR(copy->bgTrueColor);
599 | CONVERT_RGB_BGR(copy->color_fg);
600 | CONVERT_RGB_BGR(copy->color_bg);
601 | }
602 | }
603 |
604 | /*
605 | * Return copy of pixel coordinates
606 | */
607 | void psvDebugScreenGetCoordsXY(int *x, int *y) {
608 | if (x) *x = coordX;
609 | if (y) *y = coordY;
610 | }
611 |
612 | /*
613 | * Set pixel coordinates
614 | */
615 | void psvDebugScreenSetCoordsXY(int *x, int *y) {
616 | if (x) {
617 | coordX = *x;
618 | if (coordX < 0) coordX = 0;
619 | }
620 | if (y) {
621 | coordY = *y;
622 | if (coordY < 0) coordY = 0;
623 | }
624 | }
625 |
626 | /*
627 | * Return pointer to current font
628 | */
629 | PsvDebugScreenFont *psvDebugScreenGetFont(void) {
630 | return F;
631 | }
632 |
633 | /*
634 | * Set font
635 | */
636 | PsvDebugScreenFont *psvDebugScreenSetFont(PsvDebugScreenFont *font) {
637 | if ((font) && (font->glyphs)) F = font;
638 | return F;
639 | }
640 |
641 | /*
642 | * Return scaled-by-2 copy of font
643 | */
644 | PsvDebugScreenFont *psvDebugScreenScaleFont2x(PsvDebugScreenFont *source_font) {
645 | // works also with not byte-aligned glyphs
646 | PsvDebugScreenFont *target_font;
647 | size_t size;
648 | size_t align;
649 | int glyph;
650 | int row;
651 | int col;
652 | int count;
653 | unsigned char *source_bitmap;
654 | unsigned char source_mask;
655 | unsigned char *target_bitmap, *target_bitmap2;
656 | unsigned char target_mask, target_mask2;
657 | int target_next_row_bytes, target_next_row_bits;
658 | unsigned char pixel;
659 |
660 | if (!source_font) return NULL;
661 |
662 | // allocate target structure and bitmap
663 | target_font = (PsvDebugScreenFont *)malloc(sizeof(PsvDebugScreenFont));
664 | memset(target_font, 0, sizeof(PsvDebugScreenFont));
665 | // copy and scale meta information
666 | target_font->width = 2 * source_font->width;
667 | target_font->height = 2 * source_font->height;
668 | target_font->first = source_font->first;
669 | target_font->last = source_font->last;
670 | target_font->size_w = 2 * source_font->size_w;
671 | target_font->size_h = 2 * source_font->size_h;
672 |
673 | // calculate size of target bitmap
674 | size = target_font->width * target_font->height * (target_font->last - target_font->first + 1);
675 | if (size <= 0) {
676 | free(target_font);
677 | return NULL;
678 | }
679 | align = size % 8;
680 | size /= 8;
681 | if (align) size++;
682 |
683 | // allocate and initialize target bitmap
684 | target_font->glyphs = (unsigned char *)malloc(size);
685 | memset(target_font->glyphs, 0, size);
686 |
687 | // scale source bitmap and store in target bitmap
688 | source_bitmap = source_font->glyphs;
689 | source_mask = 1 << 7;
690 | //
691 | target_bitmap = target_font->glyphs;
692 | target_mask = 1 << 7;
693 | target_next_row_bytes = target_font->width / 8;
694 | target_next_row_bits = target_font->width % 8;
695 | //
696 | for (glyph = source_font->first; glyph <= source_font->last; glyph++) {
697 | for (row = source_font->height; row > 0; row--) {
698 | // Find beginning of next target row
699 | target_bitmap2 = target_bitmap + target_next_row_bytes; // advance full bytes
700 | target_mask2 = target_mask; // advance remaining bits
701 | for (col = target_next_row_bits; col > 0; col--, target_mask2 >>= 1) {
702 | if (!target_mask2) { target_bitmap2++; target_mask2 = 1 << 7; } // no more bits: we advance to the next target byte
703 | }
704 | // Get pixel from source bitmap
705 | for (col = source_font->width; col > 0; col--, source_mask >>= 1) {
706 | if (!source_mask) { source_bitmap++; source_mask = 1 << 7; } // no more bits: we advance to the next source byte
707 | pixel = *source_bitmap & source_mask;
708 | // Put pixels into target bitmap
709 | for (count = 2; count > 0; count--) {
710 | // duplicate column in origial row
711 | if (!target_mask) { target_bitmap++; target_mask = 1 << 7; } // no more bits: we advance to the next target byte
712 | if (pixel) *target_bitmap |= target_mask;
713 | target_mask >>= 1;
714 | // duplicate column in duplicated row
715 | if (!target_mask2) { target_bitmap2++; target_mask2 = 1 << 7; } // no more bits: we advance to the next target byte
716 | if (pixel) *target_bitmap2 |= target_mask2;
717 | target_mask2 >>= 1;
718 | }
719 | }
720 | // Next target row is directly behind duplicated row
721 | target_bitmap = target_bitmap2;
722 | target_mask = target_mask2;
723 | }
724 | }
725 |
726 | return target_font;
727 | }
728 |
729 | #undef SCREEN_TAB_W
730 | #undef F
731 |
732 | #endif
733 |
--------------------------------------------------------------------------------
/vita_llm.c:
--------------------------------------------------------------------------------
1 | /* Inference for Llama-2 Transformer model in pure C */
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include "../common/debugScreen.h"
20 | #if defined _WIN32
21 | #include "win.h"
22 | #else
23 |
24 | #endif
25 |
26 | // colors
27 | #define COLOR_RESET "\x1b[0m"
28 | #define COLOR_RED "\x1b[31m"
29 | #define COLOR_GREEN "\x1b[32m"
30 | #define COLOR_YELLOW "\x1b[33m"
31 | #define COLOR_CYAN "\x1b[36m"
32 |
33 | // Fwd declaration
34 | void download_file(const char* url, const char* filepath);
35 |
36 | // ----------------------------------------------------------------------------
37 | // Transformer model
38 |
39 | typedef struct {
40 | char* model_path;
41 | char* tokenizer_path;
42 | char* name;
43 | char* model_url;
44 | char* tokenizer_url;
45 | char* tokenizer_name;
46 | } ModelSuite;
47 |
48 | typedef struct {
49 | int dim; // transformer dimension
50 | int hidden_dim; // for ffn layers
51 | int n_layers; // number of layers
52 | int n_heads; // number of query heads
53 | int n_kv_heads; // number of key/value heads (can be < query heads because of multiquery)
54 | int vocab_size; // vocabulary size, usually 256 (byte-level)
55 | int seq_len; // max sequence length
56 | } Config;
57 |
58 | typedef struct {
59 | // token embedding table
60 | float* token_embedding_table; // (vocab_size, dim)
61 | // weights for rmsnorms
62 | float* rms_att_weight; // (layer, dim) rmsnorm weights
63 | float* rms_ffn_weight; // (layer, dim)
64 | // weights for matmuls. note dim == n_heads * head_size
65 | float* wq; // (layer, dim, n_heads * head_size)
66 | float* wk; // (layer, dim, n_kv_heads * head_size)
67 | float* wv; // (layer, dim, n_kv_heads * head_size)
68 | float* wo; // (layer, n_heads * head_size, dim)
69 | // weights for ffn
70 | float* w1; // (layer, hidden_dim, dim)
71 | float* w2; // (layer, dim, hidden_dim)
72 | float* w3; // (layer, hidden_dim, dim)
73 | // final rmsnorm
74 | float* rms_final_weight; // (dim,)
75 | // (optional) classifier weights for the logits, on the last layer
76 | float* wcls;
77 | } TransformerWeights;
78 |
79 | typedef struct {
80 | // current wave of activations
81 | float *x; // activation at current time stamp (dim,)
82 | float *xb; // same, but inside a residual branch (dim,)
83 | float *xb2; // an additional buffer just for convenience (dim,)
84 | float *hb; // buffer for hidden dimension in the ffn (hidden_dim,)
85 | float *hb2; // buffer for hidden dimension in the ffn (hidden_dim,)
86 | float *q; // query (dim,)
87 | float *k; // key (dim,)
88 | float *v; // value (dim,)
89 | float *att; // buffer for scores/attention values (n_heads, seq_len)
90 | float *logits; // output logits
91 | // kv cache
92 | float* key_cache; // (layer, seq_len, dim)
93 | float* value_cache; // (layer, seq_len, dim)
94 | } RunState;
95 |
96 | typedef struct {
97 | Config config; // the hyperparameters of the architecture (the blueprint)
98 | TransformerWeights weights; // the weights of the model
99 | RunState state; // buffers for the "wave" of activations in the forward pass
100 | // some more state needed to properly clean up the memory mapping (sigh)
101 | int fd; // file descriptor for memory mapping
102 | float* data; // memory mapped data pointer
103 | ssize_t file_size; // size of the checkpoint file in bytes
104 | } Transformer;
105 |
106 | void malloc_run_state(RunState* s, Config* p) {
107 | // we calloc instead of malloc to keep valgrind happy
108 | int kv_dim = (p->dim * p->n_kv_heads) / p->n_heads;
109 | s->x = calloc(p->dim, sizeof(float));
110 | s->xb = calloc(p->dim, sizeof(float));
111 | s->xb2 = calloc(p->dim, sizeof(float));
112 | s->hb = calloc(p->hidden_dim, sizeof(float));
113 | s->hb2 = calloc(p->hidden_dim, sizeof(float));
114 | s->q = calloc(p->dim, sizeof(float));
115 | s->key_cache = calloc(p->n_layers * p->seq_len * kv_dim, sizeof(float));
116 | s->value_cache = calloc(p->n_layers * p->seq_len * kv_dim, sizeof(float));
117 | s->att = calloc(p->n_heads * p->seq_len, sizeof(float));
118 | s->logits = calloc(p->vocab_size, sizeof(float));
119 | // ensure all mallocs went fine
120 | if (!s->x || !s->xb || !s->xb2 || !s->hb || !s->hb2 || !s->q
121 | || !s->key_cache || !s->value_cache || !s->att || !s->logits) {
122 | psvDebugScreenPrintf("malloc failed!\n");
123 | sceKernelExitProcess(0);
124 | }
125 | }
126 |
127 | void free_run_state(RunState* s) {
128 | free(s->x);
129 | free(s->xb);
130 | free(s->xb2);
131 | free(s->hb);
132 | free(s->hb2);
133 | free(s->q);
134 | free(s->att);
135 | free(s->logits);
136 | free(s->key_cache);
137 | free(s->value_cache);
138 | }
139 |
140 | void memory_map_weights(TransformerWeights *w, Config* p, float* ptr, int shared_weights) {
141 | int head_size = p->dim / p->n_heads;
142 | // make sure the multiplications below are done in 64bit to fit the parameter counts of 13B+ models
143 | unsigned long long n_layers = p->n_layers;
144 | w->token_embedding_table = ptr;
145 | ptr += p->vocab_size * p->dim;
146 | w->rms_att_weight = ptr;
147 | ptr += n_layers * p->dim;
148 | w->wq = ptr;
149 | ptr += n_layers * p->dim * (p->n_heads * head_size);
150 | w->wk = ptr;
151 | ptr += n_layers * p->dim * (p->n_kv_heads * head_size);
152 | w->wv = ptr;
153 | ptr += n_layers * p->dim * (p->n_kv_heads * head_size);
154 | w->wo = ptr;
155 | ptr += n_layers * (p->n_heads * head_size) * p->dim;
156 | w->rms_ffn_weight = ptr;
157 | ptr += n_layers * p->dim;
158 | w->w1 = ptr;
159 | ptr += n_layers * p->dim * p->hidden_dim;
160 | w->w2 = ptr;
161 | ptr += n_layers * p->hidden_dim * p->dim;
162 | w->w3 = ptr;
163 | ptr += n_layers * p->dim * p->hidden_dim;
164 | w->rms_final_weight = ptr;
165 | ptr += p->dim;
166 | ptr += p->seq_len * head_size / 2; // skip what used to be freq_cis_real (for RoPE)
167 | ptr += p->seq_len * head_size / 2; // skip what used to be freq_cis_imag (for RoPE)
168 | w->wcls = shared_weights ? w->token_embedding_table : ptr;
169 | }
170 |
171 | void read_checkpoint(char* checkpoint, Config* config, TransformerWeights* weights,
172 | int* fd, float** data, ssize_t* file_size) {
173 | psvDebugScreenPrintf(COLOR_CYAN "Reading checkpoint: %s\n" COLOR_RESET, checkpoint);
174 | FILE *file = fopen(checkpoint, "rb");
175 | if (!file) { psvDebugScreenPrintf(COLOR_RED "Error: Couldn't open file %s\n" COLOR_RESET, checkpoint); sceKernelExitProcess(0); }
176 | psvDebugScreenPrintf(COLOR_CYAN "File opened. Reading config...\n" COLOR_RESET);
177 | // read in the config header
178 | if (fread(config, sizeof(Config), 1, file) != 1) { sceKernelExitProcess(0); }
179 | // negative vocab size is hacky way of signaling unshared weights. bit yikes.
180 | int shared_weights = config->vocab_size > 0 ? 1 : 0;
181 | config->vocab_size = abs(config->vocab_size);
182 | // figure out the file size
183 | fseek(file, 0, SEEK_END); // move file pointer to end of file
184 | *file_size = ftell(file); // get the file size, in bytes
185 | fseek(file, 0, SEEK_SET); // rewind to beginning of file
186 | psvDebugScreenPrintf("Model file size: %ld bytes.\n", *file_size);
187 | // allocate memory for the weights
188 | psvDebugScreenPrintf(COLOR_CYAN "Attempting to allocate memory for weights...\n" COLOR_RESET);
189 | sceKernelDelayThread(500 * 1000);
190 | *data = (float*)malloc(*file_size);
191 | if (!*data) {
192 | psvDebugScreenPrintf(COLOR_RED "FATAL: malloc failed for weights!\n" COLOR_RESET);
193 | while(1) { sceKernelDelayThread(1000 * 1000); }
194 | }
195 | psvDebugScreenPrintf(COLOR_GREEN "Memory allocated successfully.\n" COLOR_RESET);
196 | // read in the weights
197 | psvDebugScreenPrintf(COLOR_CYAN "Reading weights...\n" COLOR_RESET);
198 | if (fread(*data, 1, *file_size, file) != *file_size) {
199 | psvDebugScreenPrintf(COLOR_RED "Error: failed to read weights!\n" COLOR_RESET);
200 | sceKernelExitProcess(0);
201 | }
202 | fclose(file);
203 | psvDebugScreenPrintf(COLOR_CYAN "Weights read. Mapping weights...\n" COLOR_RESET);
204 | // memory map the Transformer weights into the data pointer
205 | float* weights_ptr = *data + sizeof(Config)/sizeof(float);
206 | memory_map_weights(weights, config, weights_ptr, shared_weights);
207 | psvDebugScreenPrintf(COLOR_GREEN "Checkpoint loaded successfully.\n" COLOR_RESET);
208 | }
209 |
210 | void build_transformer(Transformer *t, char* checkpoint_path) {
211 | // read in the Config and the Weights from the checkpoint
212 | read_checkpoint(checkpoint_path, &t->config, &t->weights, &t->fd, &t->data, &t->file_size);
213 | // allocate the RunState buffers
214 | malloc_run_state(&t->state, &t->config);
215 | }
216 |
217 | void free_transformer(Transformer* t) {
218 | // free the weights
219 | if (t->data) { free(t->data); }
220 | // free the RunState buffers
221 | free_run_state(&t->state);
222 | }
223 |
224 | // ----------------------------------------------------------------------------
225 | // neural net blocks; the dynamics of the Transformer
226 |
227 | void rmsnorm(float* o, float* x, float* weight, int size) {
228 | // calculate sum of squares
229 | float ss = 0.0f;
230 | for (int j = 0; j < size; j++) {
231 | ss += x[j] * x[j];
232 | }
233 | ss /= size;
234 | ss += 1e-5f;
235 | ss = 1.0f / sqrtf(ss);
236 | // normalize and scale
237 | for (int j = 0; j < size; j++) {
238 | o[j] = weight[j] * (ss * x[j]);
239 | }
240 | }
241 |
242 | void softmax(float* x, int size) {
243 | // find max value (for numerical stability)
244 | float max_val = x[0];
245 | for (int i = 1; i < size; i++) {
246 | if (x[i] > max_val) {
247 | max_val = x[i];
248 | }
249 | }
250 | // exp and sum
251 | float sum = 0.0f;
252 | for (int i = 0; i < size; i++) {
253 | x[i] = expf(x[i] - max_val);
254 | sum += x[i];
255 | }
256 | // normalize
257 | for (int i = 0; i < size; i++) {
258 | x[i] /= sum;
259 | }
260 | }
261 |
262 | void matmul(float* xout, float* x, float* w, int n, int d) {
263 | // W (d,n) @ x (n,) -> xout (d,)
264 | // by far the most amount of time is spent inside this little function
265 | int i;
266 | // #pragma omp parallel for private(i)
267 | for (i = 0; i < d; i++) {
268 | float val = 0.0f;
269 | for (int j = 0; j < n; j++) {
270 | val += w[i * n + j] * x[j];
271 | }
272 | xout[i] = val;
273 | }
274 | }
275 |
276 | float* forward(Transformer* transformer, int token, int pos) {
277 |
278 | // a few convenience variables
279 | Config* p = &transformer->config;
280 | TransformerWeights* w = &transformer->weights;
281 | RunState* s = &transformer->state;
282 | float *x = s->x;
283 | int dim = p->dim;
284 | int kv_dim = (p->dim * p->n_kv_heads) / p->n_heads;
285 | int kv_mul = p->n_heads / p->n_kv_heads; // integer multiplier of the kv sharing in multiquery
286 | int hidden_dim = p->hidden_dim;
287 | int head_size = dim / p->n_heads;
288 |
289 | // copy the token embedding into x
290 | float* content_row = w->token_embedding_table + token * dim;
291 | memcpy(x, content_row, dim*sizeof(*x));
292 |
293 | // forward all the layers
294 | for(unsigned long long l = 0; l < p->n_layers; l++) {
295 |
296 | // attention rmsnorm
297 | rmsnorm(s->xb, x, w->rms_att_weight + l*dim, dim);
298 |
299 | // key and value point to the kv cache
300 | int loff = l * p->seq_len * kv_dim; // kv cache layer offset for convenience
301 | s->k = s->key_cache + loff + pos * kv_dim;
302 | s->v = s->value_cache + loff + pos * kv_dim;
303 |
304 | // qkv matmuls for this position
305 | matmul(s->q, s->xb, w->wq + l*dim*dim, dim, dim);
306 | matmul(s->k, s->xb, w->wk + l*dim*kv_dim, dim, kv_dim);
307 | matmul(s->v, s->xb, w->wv + l*dim*kv_dim, dim, kv_dim);
308 |
309 | // RoPE relative positional encoding: complex-valued rotate q and k in each head
310 | for (int i = 0; i < dim; i+=2) {
311 | int head_dim = i % head_size;
312 | float freq = 1.0f / powf(10000.0f, head_dim / (float)head_size);
313 | float val = pos * freq;
314 | float fcr = cosf(val);
315 | float fci = sinf(val);
316 | int rotn = i < kv_dim ? 2 : 1; // how many vectors? 2 = q & k, 1 = q only
317 | for (int v = 0; v < rotn; v++) {
318 | float* vec = v == 0 ? s->q : s->k; // the vector to rotate (query or key)
319 | float v0 = vec[i];
320 | float v1 = vec[i+1];
321 | vec[i] = v0 * fcr - v1 * fci;
322 | vec[i+1] = v0 * fci + v1 * fcr;
323 | }
324 | }
325 |
326 | // multihead attention. iterate over all heads
327 | int h;
328 | // #pragma omp parallel for private(h)
329 | for (h = 0; h < p->n_heads; h++) {
330 | // get the query vector for this head
331 | float* q = s->q + h * head_size;
332 | // attention scores for this head
333 | float* att = s->att + h * p->seq_len;
334 | // iterate over all timesteps, including the current one
335 | for (int t = 0; t <= pos; t++) {
336 | // get the key vector for this head and at this timestep
337 | float* k = s->key_cache + loff + t * kv_dim + (h / kv_mul) * head_size;
338 | // calculate the attention score as the dot product of q and k
339 | float score = 0.0f;
340 | for (int i = 0; i < head_size; i++) {
341 | score += q[i] * k[i];
342 | }
343 | score /= sqrtf(head_size);
344 | // save the score to the attention buffer
345 | att[t] = score;
346 | }
347 |
348 | // softmax the scores to get attention weights, from 0..pos inclusively
349 | softmax(att, pos + 1);
350 |
351 | // weighted sum of the values, store back into xb
352 | float* xb = s->xb + h * head_size;
353 | memset(xb, 0, head_size * sizeof(float));
354 | for (int t = 0; t <= pos; t++) {
355 | // get the value vector for this head and at this timestep
356 | float* v = s->value_cache + loff + t * kv_dim + (h / kv_mul) * head_size;
357 | // get the attention weight for this timestep
358 | float a = att[t];
359 | // accumulate the weighted value into xb
360 | for (int i = 0; i < head_size; i++) {
361 | xb[i] += a * v[i];
362 | }
363 | }
364 | }
365 |
366 | // final matmul to get the output of the attention
367 | matmul(s->xb2, s->xb, w->wo + l*dim*dim, dim, dim);
368 |
369 | // residual connection back into x
370 | for (int i = 0; i < dim; i++) {
371 | x[i] += s->xb2[i];
372 | }
373 |
374 | // ffn rmsnorm
375 | rmsnorm(s->xb, x, w->rms_ffn_weight + l*dim, dim);
376 |
377 | // Now for FFN in PyTorch we have: self.w2(F.silu(self.w1(x)) * self.w3(x))
378 | // first calculate self.w1(x) and self.w3(x)
379 | matmul(s->hb, s->xb, w->w1 + l*dim*hidden_dim, dim, hidden_dim);
380 | matmul(s->hb2, s->xb, w->w3 + l*dim*hidden_dim, dim, hidden_dim);
381 |
382 | // SwiGLU non-linearity
383 | for (int i = 0; i < hidden_dim; i++) {
384 | float val = s->hb[i];
385 | // silu(x)=x*σ(x), where σ(x) is the logistic sigmoid
386 | val *= (1.0f / (1.0f + expf(-val)));
387 | // elementwise multiply with w3(x)
388 | val *= s->hb2[i];
389 | s->hb[i] = val;
390 | }
391 |
392 | // final matmul to get the output of the ffn
393 | matmul(s->xb, s->hb, w->w2 + l*dim*hidden_dim, hidden_dim, dim);
394 |
395 | // residual connection
396 | for (int i = 0; i < dim; i++) {
397 | x[i] += s->xb[i];
398 | }
399 | }
400 |
401 | // final rmsnorm
402 | rmsnorm(x, x, w->rms_final_weight, dim);
403 |
404 | // classifier into logits
405 | matmul(s->logits, x, w->wcls, p->dim, p->vocab_size);
406 | return s->logits;
407 | }
408 |
409 | // ----------------------------------------------------------------------------
410 | // The Byte Pair Encoding (BPE) Tokenizer that translates strings <-> tokens
411 |
412 | typedef struct {
413 | char *str;
414 | int id;
415 | } TokenIndex;
416 |
417 | typedef struct {
418 | char** vocab;
419 | float* vocab_scores;
420 | TokenIndex *sorted_vocab;
421 | int vocab_size;
422 | unsigned int max_token_length;
423 | unsigned char byte_pieces[512]; // stores all single-byte strings
424 | } Tokenizer;
425 |
426 | int compare_tokens(const void *a, const void *b) {
427 | return strcmp(((TokenIndex*)a)->str, ((TokenIndex*)b)->str);
428 | }
429 |
430 | void build_tokenizer(Tokenizer* t, char* tokenizer_path, int vocab_size) {
431 | t->vocab_size = vocab_size;
432 | psvDebugScreenPrintf("Tokenizer vocab size: %d\n", vocab_size);
433 | // i should have written the vocab file as a simple text file, i forgive myself...
434 | t->vocab = (char**)malloc(vocab_size * sizeof(char*));
435 | t->vocab_scores = (float*)malloc(vocab_size * sizeof(float));
436 | t->sorted_vocab = NULL; // initialized lazily
437 | for (int i = 0; i < 256; i++) {
438 | t->byte_pieces[i * 2] = (unsigned char)i;
439 | t->byte_pieces[i * 2 + 1] = '\0';
440 | }
441 | // read in the file
442 | FILE *file = fopen(tokenizer_path, "rb");
443 | if (!file) { psvDebugScreenPrintf("couldn't load %s\n", tokenizer_path); sceKernelExitProcess(0); }
444 | if (fread(&t->max_token_length, sizeof(int), 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0); }
445 | int len;
446 | for (int i = 0; i < vocab_size; i++) {
447 | if (fread(t->vocab_scores + i, sizeof(float), 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0);}
448 | if (fread(&len, sizeof(int), 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0); }
449 | t->vocab[i] = (char *)malloc(len + 1);
450 | if (fread(t->vocab[i], len, 1, file) != 1) { psvDebugScreenPrintf("failed read\n"); sceKernelExitProcess(0); }
451 | t->vocab[i][len] = '\0'; // add the string terminating token
452 | }
453 | fclose(file);
454 | }
455 |
456 | void free_tokenizer(Tokenizer* t) {
457 | for (int i = 0; i < t->vocab_size; i++) { free(t->vocab[i]); }
458 | free(t->vocab);
459 | free(t->vocab_scores);
460 | free(t->sorted_vocab);
461 | }
462 |
463 | char* decode(Tokenizer* t, int prev_token, int token) {
464 | char *piece = t->vocab[token];
465 | // following BOS (1) token, sentencepiece decoder strips any leading whitespace (see PR #89)
466 | if (prev_token == 1 && piece[0] == ' ') { piece++; }
467 | // careful, some tokens designate raw bytes, and look like e.g. '<0x01>'
468 | // parse this and convert and return the actual byte
469 | unsigned char byte_val;
470 | if (sscanf(piece, "<0x%02hhX>", &byte_val) == 1) {
471 | piece = (char*)t->byte_pieces + byte_val * 2;
472 | }
473 | return piece;
474 | }
475 |
476 | void safe_printf(char *piece) {
477 | // piece might be a raw byte token, and we only want to print printable chars or whitespace
478 | // because some of the other bytes can be various control codes, backspace, etc.
479 | if (piece == NULL) { return; }
480 | if (piece[0] == '\0') { return; }
481 | if (piece[1] == '\0') {
482 | unsigned char byte_val = piece[0];
483 | if (!(isprint(byte_val) || isspace(byte_val))) {
484 | return; // bad byte, don't print it
485 | }
486 | }
487 | psvDebugScreenPrintf("%s", piece);
488 | }
489 |
490 | int str_lookup(char *str, TokenIndex *sorted_vocab, int vocab_size) {
491 | // efficiently find the perfect match for str in vocab, return its index or -1 if not found
492 | TokenIndex tok = { .str = str }; // acts as the key to search for
493 | TokenIndex *res = bsearch(&tok, sorted_vocab, vocab_size, sizeof(TokenIndex), compare_tokens);
494 | return res != NULL ? res->id : -1;
495 | }
496 |
497 | void encode(Tokenizer* t, char *text, int8_t bos, int8_t eos, int *tokens, int *n_tokens) {
498 | // encode the string text (input) into an upper-bound preallocated tokens[] array
499 | // bos != 0 means prepend the BOS token (=1), eos != 0 means append the EOS token (=2)
500 | if (text == NULL) { psvDebugScreenPrintf("cannot encode NULL text\n"); sceKernelExitProcess(0); }
501 |
502 | psvDebugScreenPrintf("Inside encode, vocab size: %d\n", t->vocab_size);
503 | if (t->sorted_vocab == NULL) {
504 | psvDebugScreenPrintf("Lazily sorting vocab...\n");
505 | // lazily sort the vocabulary
506 | t->sorted_vocab = malloc(t->vocab_size * sizeof(TokenIndex));
507 | for (int i = 0; i < t->vocab_size; i++) {
508 | t->sorted_vocab[i].str = t->vocab[i];
509 | t->sorted_vocab[i].id = i;
510 | }
511 | qsort(t->sorted_vocab, t->vocab_size, sizeof(TokenIndex), compare_tokens);
512 | }
513 |
514 | // create a temporary buffer that will store merge candidates of always two consecutive tokens
515 | // *2 for concat, +1 for null terminator +2 for UTF8 (in case max_token_length is 1)
516 | char* str_buffer = malloc((t->max_token_length*2 +1 +2) * sizeof(char));
517 | size_t str_len = 0;
518 |
519 | // start at 0 tokens
520 | *n_tokens = 0;
521 |
522 | // add optional BOS (=1) token, if desired
523 | if (bos) tokens[(*n_tokens)++] = 1;
524 |
525 | // add_dummy_prefix is true by default
526 | // so prepend a dummy prefix token to the input string, but only if text != ""
527 | // TODO: pretty sure this isn't correct in the general case but I don't have the
528 | // energy to read more of the sentencepiece code to figure out what it's doing
529 | if (text[0] != '\0') {
530 | int dummy_prefix = str_lookup(" ", t->sorted_vocab, t->vocab_size);
531 | tokens[(*n_tokens)++] = dummy_prefix;
532 | }
533 |
534 | // Okay UTF-8 time. This will get messy. Here is the reference from Wikipedia:
535 | // Code point ↔ UTF-8 conversion
536 | // First code point Last code point Byte 1 Byte 2 Byte 3 Byte 4
537 | // U+0000 U+007F 0xxxxxxx
538 | // U+0080 U+07FF 110xxxxx 10xxxxxx
539 | // U+0800 U+FFFF 1110xxxx 10xxxxxx 10xxxxxx
540 | // U+10000 U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
541 |
542 | // process the raw (UTF-8) byte sequence of the input string
543 | for (char *c = text; *c != '\0'; c++) {
544 |
545 | // reset buffer if the current byte is ASCII or a leading byte
546 | // 0xC0 is 11000000, so (*c & 0xC0) keeps the first 2 bits and zeros the rest
547 | // 0x80 is 10000000
548 | // in UTF-8, all continuation bytes start with "10" in first two bits
549 | // so in English this is: "if this byte is not a continuation byte"
550 | if ((*c & 0xC0) != 0x80) {
551 | // this byte must be either a leading byte (11...) or an ASCII char (0x...)
552 | // => reset our location, as we're starting a new UTF-8 codepoint
553 | str_len = 0;
554 | }
555 |
556 | // append the current byte to the buffer
557 | str_buffer[str_len++] = *c; // ++ is post-increment, incremented after this line
558 | str_buffer[str_len] = '\0';
559 |
560 | // while the next character is a continuation byte, continue appending
561 | // but if there are too many of them, just stop to avoid overruning str_buffer size.
562 | if ((*(c+1) & 0xC0) == 0x80 && str_len < 4) {
563 | continue;
564 | }
565 |
566 | // ok c+1 is not a continuation byte, so we've read in a full codepoint
567 | int id = str_lookup(str_buffer, t->sorted_vocab, t->vocab_size);
568 |
569 | if (id != -1) {
570 | // we found this codepoint in vocab, add it as a token
571 | tokens[(*n_tokens)++] = id;
572 | } else {
573 | // byte_fallback encoding: just encode each byte as a token
574 | // +3 is here because the first 3 vocab elements are , ,
575 | // so the individual bytes only start at index 3
576 | for (int i=0; i < str_len; i++) {
577 | tokens[(*n_tokens)++] = (unsigned char)str_buffer[i] + 3;
578 | }
579 | }
580 | str_len = 0; // protect against a sequence of stray UTF8 continuation bytes
581 | }
582 |
583 | // merge the best consecutive pair each iteration, according the scores in vocab_scores
584 | while (1) {
585 | float best_score = -1e10;
586 | int best_id = -1;
587 | int best_idx = -1;
588 |
589 | for (int i=0; i < (*n_tokens-1); i++) {
590 | // check if we can merge the pair (tokens[i], tokens[i+1])
591 | sprintf(str_buffer, "%s%s", t->vocab[tokens[i]], t->vocab[tokens[i+1]]);
592 | int id = str_lookup(str_buffer, t->sorted_vocab, t->vocab_size);
593 | if (id != -1 && t->vocab_scores[id] > best_score) {
594 | // this merge pair exists in vocab! record its score and position
595 | best_score = t->vocab_scores[id];
596 | best_id = id;
597 | best_idx = i;
598 | }
599 | }
600 |
601 | if (best_idx == -1) {
602 | break; // we couldn't find any more pairs to merge, so we're done
603 | }
604 |
605 | // merge the consecutive pair (best_idx, best_idx+1) into new token best_id
606 | tokens[best_idx] = best_id;
607 | // delete token at position best_idx+1, shift the entire sequence back 1
608 | for (int i = best_idx+1; i < (*n_tokens-1); i++) {
609 | tokens[i] = tokens[i+1];
610 | }
611 | (*n_tokens)--; // token length decreased
612 | }
613 |
614 | // add optional EOS (=2) token, if desired
615 | if (eos) tokens[(*n_tokens)++] = 2;
616 |
617 | free(str_buffer);
618 | }
619 |
620 | // ----------------------------------------------------------------------------
621 | // The Sampler, which takes logits and returns a sampled token
622 | // sampling can be done in a few ways: greedy argmax, sampling, top-p sampling
623 |
624 | typedef struct {
625 | float prob;
626 | int index;
627 | } ProbIndex; // struct used when sorting probabilities during top-p sampling
628 |
629 | typedef struct {
630 | int vocab_size;
631 | ProbIndex* probindex; // buffer used in top-p sampling
632 | float temperature;
633 | float topp;
634 | unsigned long long rng_state;
635 | } Sampler;
636 |
637 | int sample_argmax(float* probabilities, int n) {
638 | // return the index that has the highest probability
639 | int max_i = 0;
640 | float max_p = probabilities[0];
641 | for (int i = 1; i < n; i++) {
642 | if (probabilities[i] > max_p) {
643 | max_i = i;
644 | max_p = probabilities[i];
645 | }
646 | }
647 | return max_i;
648 | }
649 |
650 | int sample_mult(float* probabilities, int n, float coin) {
651 | // sample index from probabilities (they must sum to 1!)
652 | // coin is a random number in [0, 1), usually from random_f32()
653 | float cdf = 0.0f;
654 | for (int i = 0; i < n; i++) {
655 | cdf += probabilities[i];
656 | if (coin < cdf) {
657 | return i;
658 | }
659 | }
660 | return n - 1; // in case of rounding errors
661 | }
662 |
663 | int compare(const void* a, const void* b) {
664 | ProbIndex* a_ = (ProbIndex*) a;
665 | ProbIndex* b_ = (ProbIndex*) b;
666 | if (a_->prob > b_->prob) return -1;
667 | if (a_->prob < b_->prob) return 1;
668 | return 0;
669 | }
670 |
671 | int sample_topp(float* probabilities, int n, float topp, ProbIndex* probindex, float coin) {
672 | // top-p sampling (or "nucleus sampling") samples from the smallest set of
673 | // tokens that exceed probability topp. This way we never sample tokens that
674 | // have very low probabilities and are less likely to go "off the rails".
675 | // coin is a random number in [0, 1), usually from random_f32()
676 |
677 | int n0 = 0;
678 | // quicksort indices in descending order of probabilities
679 | // values smaller than (1 - topp) / (n - 1) cannot be part of the result
680 | // so for efficiency we crop these out as candidates before sorting
681 | const float cutoff = (1.0f - topp) / (n - 1);
682 | for (int i = 0; i < n; i++) {
683 | if (probabilities[i] >= cutoff) {
684 | probindex[n0].index = i;
685 | probindex[n0].prob = probabilities[i];
686 | n0++;
687 | }
688 | }
689 | qsort(probindex, n0, sizeof(ProbIndex), compare);
690 |
691 | // truncate the list where cumulative probability exceeds topp
692 | float cumulative_prob = 0.0f;
693 | int last_idx = n0 - 1; // in case of rounding errors consider all elements
694 | for (int i = 0; i < n0; i++) {
695 | cumulative_prob += probindex[i].prob;
696 | if (cumulative_prob > topp) {
697 | last_idx = i;
698 | break; // we've exceeded topp by including last_idx
699 | }
700 | }
701 |
702 | // sample from the truncated list
703 | float r = coin * cumulative_prob;
704 | float cdf = 0.0f;
705 | for (int i = 0; i <= last_idx; i++) {
706 | cdf += probindex[i].prob;
707 | if (r < cdf) {
708 | return probindex[i].index;
709 | }
710 | }
711 | return probindex[last_idx].index; // in case of rounding errors
712 | }
713 |
714 | void build_sampler(Sampler* sampler, int vocab_size, float temperature, float topp, unsigned long long rng_seed) {
715 | sampler->vocab_size = vocab_size;
716 | sampler->temperature = temperature;
717 | sampler->topp = topp;
718 | sampler->rng_state = rng_seed;
719 | // buffer only used with nucleus sampling; may not need but it's ~small
720 | sampler->probindex = malloc(sampler->vocab_size * sizeof(ProbIndex));
721 | }
722 |
723 | void free_sampler(Sampler* sampler) {
724 | free(sampler->probindex);
725 | }
726 |
727 | unsigned int random_u32(unsigned long long *state) {
728 | // xorshift rng: https://en.wikipedia.org/wiki/Xorshift#xorshift.2A
729 | *state ^= *state >> 12;
730 | *state ^= *state << 25;
731 | *state ^= *state >> 27;
732 | return (*state * 0x2545F4914F6CDD1Dull) >> 32;
733 | }
734 | float random_f32(unsigned long long *state) { // random float32 in [0,1)
735 | return (random_u32(state) >> 8) / 16777216.0f;
736 | }
737 |
738 | int sample(Sampler* sampler, float* logits) {
739 | // sample the token given the logits and some hyperparameters
740 | int next;
741 | if (sampler->temperature == 0.0f) {
742 | // greedy argmax sampling: take the token with the highest probability
743 | next = sample_argmax(logits, sampler->vocab_size);
744 | } else {
745 | // apply the temperature to the logits
746 | for (int q=0; qvocab_size; q++) { logits[q] /= sampler->temperature; }
747 | // apply softmax to the logits to get the probabilities for next token
748 | softmax(logits, sampler->vocab_size);
749 | // flip a (float) coin (this is our source of entropy for sampling)
750 | float coin = random_f32(&sampler->rng_state);
751 | // we sample from this distribution to get the next token
752 | if (sampler->topp <= 0 || sampler->topp >= 1) {
753 | // simply sample from the predicted probability distribution
754 | next = sample_mult(logits, sampler->vocab_size, coin);
755 | } else {
756 | // top-p (nucleus) sampling, clamping the least likely tokens to zero
757 | next = sample_topp(logits, sampler->vocab_size, sampler->topp, sampler->probindex, coin);
758 | }
759 | }
760 | return next;
761 | }
762 |
763 | // ----------------------------------------------------------------------------
764 | // utilities: time
765 |
766 | long time_in_ms() {
767 | // return time in milliseconds, for benchmarking the model speed
768 | struct timespec time;
769 | clock_gettime(CLOCK_REALTIME, &time);
770 | return time.tv_sec * 1000 + time.tv_nsec / 1000000;
771 | }
772 |
773 | // ----------------------------------------------------------------------------
774 | // generation loop
775 |
776 | void generate(Transformer *transformer, Tokenizer *tokenizer, Sampler *sampler, char *prompt, int steps) {
777 | char *empty_prompt = "";
778 | if (prompt == NULL) { prompt = empty_prompt; }
779 |
780 | // encode the (string) prompt into tokens sequence
781 | int num_prompt_tokens = 0;
782 | int* prompt_tokens = (int*)malloc((strlen(prompt)+3) * sizeof(int)); // +3 for '\0', ?BOS, ?EOS
783 | encode(tokenizer, prompt, 1, 0, prompt_tokens, &num_prompt_tokens);
784 | if (num_prompt_tokens < 1) {
785 | psvDebugScreenPrintf("something is wrong, expected at least 1 prompt token\n");
786 | sceKernelExitProcess(0);
787 | }
788 |
789 | // start the main loop
790 | long start = 0; // used to time our code, only initialized after first iteration
791 | int next; // will store the next token in the sequence
792 | int token = prompt_tokens[0]; // kick off with the first token in the prompt
793 | int pos = 0; // position in the sequence
794 |
795 | psvDebugScreenPrintf("\n%s", prompt);
796 |
797 | while (pos < steps) {
798 | // forward the transformer to get logits for the next token
799 | float* logits = forward(transformer, token, pos);
800 |
801 | // advance the state machine
802 | if (pos < num_prompt_tokens - 1) {
803 | // if we are still processing the input prompt, force the next prompt token
804 | next = prompt_tokens[pos + 1];
805 | } else {
806 | // otherwise sample the next token from the logits
807 | next = sample(sampler, logits);
808 |
809 | // print the token as string, decode it with the Tokenizer object
810 | char* piece = decode(tokenizer, token, next);
811 | safe_printf(piece);
812 | }
813 | pos++;
814 |
815 | // data-dependent terminating condition: the BOS (=1) token delimits sequences
816 | if (next == 1) { break; }
817 | token = next;
818 |
819 | // init the timer here because the first iteration can be slower
820 | if (start == 0) { start = time_in_ms(); }
821 | }
822 | psvDebugScreenPrintf("\n");
823 |
824 | // report achieved tok/s (pos-1 because the timer starts after first iteration)
825 | if (pos > 1) {
826 | long end = time_in_ms();
827 | psvDebugScreenPrintf(COLOR_YELLOW "\nachieved tok/s: %f\n" COLOR_RESET, (pos-1) / (double)(end-start)*1000);
828 | }
829 |
830 | free(prompt_tokens);
831 | }
832 |
833 | void read_stdin(const char* guide, char* buffer, size_t bufsize) {
834 | // read a line from stdin, up to but not including \n
835 | printf("%s", guide);
836 | if (fgets(buffer, bufsize, stdin) != NULL) {
837 | size_t len = strlen(buffer);
838 | if (len > 0 && buffer[len - 1] == '\n') {
839 | buffer[len - 1] = '\0'; // strip newline
840 | }
841 | }
842 | }
843 |
844 | void display_manage_models_menu(ModelSuite* suites, int count) {
845 | SceCtrlData pad;
846 | // wait for button release
847 | while(sceCtrlPeekBufferPositive(0, &pad, 1), pad.buttons) { sceKernelDelayThread(16 * 1000); }
848 | uint32_t old_buttons = 0;
849 | int selection = 0;
850 | int x = 0, y = 0;
851 |
852 | while (1) {
853 | psvDebugScreenClear(0xFF000000);
854 | psvDebugScreenSetCoordsXY(&x, &y);
855 | psvDebugScreenPrintf(COLOR_CYAN "Manage Models:\n\n" COLOR_RESET);
856 |
857 | for (int i = 0; i < count; i++) {
858 | FILE* model_file = fopen(suites[i].model_path, "rb");
859 | int is_downloaded = 0;
860 | if(model_file){
861 | fclose(model_file);
862 | FILE* tokenizer_file = fopen(suites[i].tokenizer_path, "rb");
863 | if(tokenizer_file){
864 | fclose(tokenizer_file);
865 | is_downloaded = 1;
866 | }
867 | }
868 |
869 | if (i == selection) {
870 | psvDebugScreenPrintf("-> ");
871 | } else {
872 | psvDebugScreenPrintf(" ");
873 | }
874 |
875 | if (is_downloaded) {
876 | psvDebugScreenPrintf(COLOR_GREEN "%s (Downloaded)\n" COLOR_RESET, suites[i].name);
877 | } else {
878 | psvDebugScreenPrintf(COLOR_YELLOW "%s (Not Downloaded)\n" COLOR_RESET, suites[i].name);
879 | }
880 | }
881 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress X to manage, O to go back.\n" COLOR_RESET);
882 |
883 | while(1) {
884 | sceCtrlPeekBufferPositive(0, &pad, 1);
885 | uint32_t pressed = pad.buttons & ~old_buttons;
886 | old_buttons = pad.buttons;
887 |
888 | if (pressed & SCE_CTRL_UP) {
889 | selection = (selection > 0) ? selection - 1 : count - 1;
890 | break;
891 | }
892 | if (pressed & SCE_CTRL_DOWN) {
893 | selection = (selection < count - 1) ? selection + 1 : 0;
894 | break;
895 | }
896 | if (pressed & SCE_CTRL_CIRCLE) {
897 | return;
898 | }
899 | if (pressed & SCE_CTRL_CROSS) {
900 | FILE* model_file = fopen(suites[selection].model_path, "rb");
901 | int is_downloaded = 0;
902 | if(model_file){
903 | fclose(model_file);
904 | FILE* tokenizer_file = fopen(suites[selection].tokenizer_path, "rb");
905 | if(tokenizer_file){
906 | fclose(tokenizer_file);
907 | is_downloaded = 1;
908 | }
909 | }
910 |
911 | if (is_downloaded) {
912 | psvDebugScreenClear(0xFF000000);
913 | psvDebugScreenSetCoordsXY(&x, &y);
914 | psvDebugScreenPrintf(COLOR_RED "Delete %s and its tokenizer?\n\n" COLOR_RESET, suites[selection].name);
915 | psvDebugScreenPrintf(COLOR_YELLOW "Press X to confirm, O to cancel.\n" COLOR_RESET);
916 |
917 | while(1) {
918 | uint32_t confirm_pressed = 0;
919 | sceCtrlPeekBufferPositive(0, &pad, 1);
920 | confirm_pressed = pad.buttons & ~old_buttons;
921 | old_buttons = pad.buttons;
922 |
923 | if (confirm_pressed & SCE_CTRL_CIRCLE) {
924 | break;
925 | }
926 | if (confirm_pressed & SCE_CTRL_CROSS) {
927 | sceIoRemove(suites[selection].model_path);
928 | sceIoRemove(suites[selection].tokenizer_path);
929 | psvDebugScreenClear(0xFF000000);
930 | psvDebugScreenSetCoordsXY(&x, &y);
931 | psvDebugScreenPrintf(COLOR_GREEN "Model deleted.\n" COLOR_RESET);
932 | sceKernelDelayThread(1 * 1000 * 1000);
933 | break;
934 | }
935 | sceKernelDelayThread(16 * 1000);
936 | }
937 | } else {
938 | psvDebugScreenClear(0xFF000000);
939 | psvDebugScreenSetCoordsXY(&x, &y);
940 | psvDebugScreenPrintf(COLOR_CYAN "Download %s and its tokenizer?\n\n" COLOR_RESET, suites[selection].name);
941 | psvDebugScreenPrintf(COLOR_YELLOW "Press X to confirm, O to cancel.\n" COLOR_RESET);
942 |
943 | while(1) {
944 | uint32_t confirm_pressed = 0;
945 | sceCtrlPeekBufferPositive(0, &pad, 1);
946 | confirm_pressed = pad.buttons & ~old_buttons;
947 | old_buttons = pad.buttons;
948 |
949 | if (confirm_pressed & SCE_CTRL_CIRCLE) {
950 | break;
951 | }
952 | if (confirm_pressed & SCE_CTRL_CROSS) {
953 | psvDebugScreenClear(0xFF000000);
954 | psvDebugScreenPrintf(COLOR_CYAN "Downloading %s...\n" COLOR_RESET, suites[selection].name);
955 | download_file(suites[selection].model_url, suites[selection].model_path);
956 |
957 | psvDebugScreenPrintf(COLOR_CYAN "Downloading tokenizer %s...\n" COLOR_RESET, suites[selection].tokenizer_name);
958 | download_file(suites[selection].tokenizer_url, suites[selection].tokenizer_path);
959 |
960 | psvDebugScreenPrintf(COLOR_GREEN "\nDownloads finished. Press X to continue.\n" COLOR_RESET);
961 | while(1){
962 | uint32_t continue_pressed = 0;
963 | sceCtrlPeekBufferPositive(0, &pad, 1);
964 | continue_pressed = pad.buttons & ~old_buttons;
965 | old_buttons = pad.buttons;
966 | if(continue_pressed & SCE_CTRL_CROSS) break;
967 | sceKernelDelayThread(16 * 1000);
968 | }
969 | break;
970 | }
971 | sceKernelDelayThread(16 * 1000);
972 | }
973 | }
974 | break;
975 | }
976 | sceKernelDelayThread(16 * 1000);
977 | }
978 | }
979 | }
980 |
981 | void netInit() {
982 | sceSysmoduleLoadModule(SCE_SYSMODULE_NET);
983 | SceNetInitParam netInitParam;
984 | int size = 4 * 1024 * 1024;
985 | netInitParam.memory = malloc(size);
986 | netInitParam.size = size;
987 | netInitParam.flags = 0;
988 | sceNetInit(&netInitParam);
989 | sceNetCtlInit();
990 | }
991 |
992 | void httpInit() {
993 | sceSysmoduleLoadModule(SCE_SYSMODULE_HTTP);
994 | sceHttpInit(4 * 1024 * 1024);
995 | }
996 |
997 | static size_t write_data_to_disk(void *ptr, size_t size, size_t nmemb, void *stream) {
998 | return sceIoWrite(*(int*)stream, ptr, size * nmemb);
999 | }
1000 |
1001 | int progress_callback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
1002 | // Only show progress for transfers over 10KB to filter out small HTTP responses (e.g. redirects)
1003 | if (dltotal > 10000) {
1004 | float percent = (float)dlnow / (float)dltotal * 100.0f;
1005 | psvDebugScreenPrintf("\rDownloading... %6.2f%%", percent);
1006 | }
1007 | return 0;
1008 | }
1009 |
1010 | void download_file(const char* url, const char* filepath) {
1011 | CURL *curl;
1012 | CURLcode res;
1013 | int file_fd = sceIoOpen(filepath, SCE_O_WRONLY | SCE_O_CREAT | SCE_O_TRUNC, 0777);
1014 |
1015 | if (file_fd < 0) {
1016 | psvDebugScreenPrintf(COLOR_RED "\nError opening file for writing: %s\n" COLOR_RESET, filepath);
1017 | return;
1018 | }
1019 |
1020 | curl = curl_easy_init();
1021 | if (curl) {
1022 | curl_easy_setopt(curl, CURLOPT_URL, url);
1023 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data_to_disk);
1024 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, &file_fd);
1025 | curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, progress_callback);
1026 | curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
1027 | curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
1028 | curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
1029 | curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
1030 |
1031 | res = curl_easy_perform(curl);
1032 | psvDebugScreenPrintf("\n");
1033 |
1034 | if (res != CURLE_OK) {
1035 | psvDebugScreenPrintf(COLOR_RED "Download failed: %s\n" COLOR_RESET, curl_easy_strerror(res));
1036 | } else {
1037 | psvDebugScreenPrintf(COLOR_GREEN "Download successful!\n" COLOR_RESET);
1038 | }
1039 | curl_easy_cleanup(curl);
1040 | }
1041 | sceIoClose(file_fd);
1042 | }
1043 |
1044 | void display_download_menu() {
1045 | SceCtrlData pad;
1046 | // wait for button release
1047 | while(sceCtrlPeekBufferPositive(0, &pad, 1), pad.buttons) { sceKernelDelayThread(16 * 1000); }
1048 | uint32_t old_buttons = 0;
1049 | int selection = 0;
1050 | int x = 0, y = 0;
1051 | const int num_options = 3;
1052 | const char* options[] = {"Download 15M Model", "Download 260K Model", "Back"};
1053 |
1054 | ModelSuite suites[] = {
1055 | {"ux0:data/stories15M.bin", "ux0:data/tokenizer.bin", "stories15M", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin", "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.bin", "tokenizer.bin"},
1056 | {"ux0:data/stories260K.bin", "ux0:data/tok512.bin", "stories260K", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin", "tok512.bin"}
1057 | };
1058 |
1059 | while(1) {
1060 | psvDebugScreenClear(0xFF000000);
1061 | psvDebugScreenSetCoordsXY(&x, &y);
1062 | psvDebugScreenPrintf(COLOR_CYAN "No models found. Would you like to download them?\n\n" COLOR_RESET);
1063 | for(int i=0; i < num_options; ++i) {
1064 | if(i == selection) {
1065 | psvDebugScreenPrintf("-> " COLOR_GREEN "%s\n" COLOR_RESET, options[i]);
1066 | } else {
1067 | psvDebugScreenPrintf(" %s\n", options[i]);
1068 | }
1069 | }
1070 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress X to confirm, UP/DOWN to navigate.\n" COLOR_RESET);
1071 |
1072 | while(1) {
1073 | sceCtrlPeekBufferPositive(0, &pad, 1);
1074 | uint32_t pressed = pad.buttons & ~old_buttons;
1075 | old_buttons = pad.buttons;
1076 |
1077 | if(pressed & SCE_CTRL_UP) {
1078 | selection = (selection > 0) ? selection - 1 : num_options - 1;
1079 | break;
1080 | }
1081 | if(pressed & SCE_CTRL_DOWN) {
1082 | selection = (selection < num_options - 1) ? selection + 1 : 0;
1083 | break;
1084 | }
1085 | if(pressed & SCE_CTRL_CROSS) {
1086 | if(selection == 2) {
1087 | psvDebugScreenPrintf(COLOR_RED "No models to load. Exiting.\n" COLOR_RESET);
1088 | sceKernelDelayThread(2 * 1000 * 1000);
1089 | sceKernelExitProcess(0);
1090 | }
1091 |
1092 | psvDebugScreenClear(0xFF000000);
1093 | psvDebugScreenPrintf(COLOR_CYAN "Downloading %s...\n" COLOR_RESET, suites[selection].name);
1094 | download_file(suites[selection].model_url, suites[selection].model_path);
1095 |
1096 | psvDebugScreenPrintf(COLOR_CYAN "Downloading tokenizer %s...\n" COLOR_RESET, suites[selection].tokenizer_name);
1097 | download_file(suites[selection].tokenizer_url, suites[selection].tokenizer_path);
1098 |
1099 | psvDebugScreenPrintf(COLOR_GREEN "\nDownloads finished. Press X to continue.\n" COLOR_RESET);
1100 | while(1){
1101 | uint32_t confirm_pressed = 0;
1102 | sceCtrlPeekBufferPositive(0, &pad, 1);
1103 | confirm_pressed = pad.buttons & ~old_buttons;
1104 | old_buttons = pad.buttons;
1105 | if(confirm_pressed & SCE_CTRL_CROSS) break;
1106 | sceKernelDelayThread(16 * 1000);
1107 | }
1108 | return;
1109 | }
1110 | sceKernelDelayThread(16*1000);
1111 | }
1112 | }
1113 | }
1114 |
1115 | void display_model_selection_menu(ModelSuite* suites, int count, int* selected_index, int* menu_action) {
1116 | SceCtrlData pad;
1117 | // wait for button release
1118 | while(sceCtrlPeekBufferPositive(0, &pad, 1), pad.buttons) { sceKernelDelayThread(16 * 1000); }
1119 | uint32_t old_buttons = 0;
1120 | int current_selection = 0;
1121 | int x = 0, y = 0;
1122 | const int num_options = count + 1;
1123 |
1124 | while (1) {
1125 | psvDebugScreenClear(0xFF000000);
1126 | psvDebugScreenSetCoordsXY(&x, &y);
1127 | psvDebugScreenPrintf(COLOR_CYAN "Select a model:\n\n" COLOR_RESET);
1128 |
1129 | for (int i = 0; i < count; i++) {
1130 | if (i == current_selection) {
1131 | psvDebugScreenPrintf("-> " COLOR_GREEN "%s\n" COLOR_RESET, suites[i].name);
1132 | } else {
1133 | psvDebugScreenPrintf(" %s\n", suites[i].name);
1134 | }
1135 | }
1136 |
1137 | if (current_selection == count) {
1138 | psvDebugScreenPrintf("-> " COLOR_YELLOW "%s\n" COLOR_RESET, "Manage local models...");
1139 | } else {
1140 | psvDebugScreenPrintf(" %s\n", "Manage local models...");
1141 | }
1142 |
1143 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress X to confirm, UP/DOWN to navigate.\n" COLOR_RESET);
1144 |
1145 | while (1) {
1146 | sceCtrlPeekBufferPositive(0, &pad, 1);
1147 | uint32_t pressed_buttons = pad.buttons & ~old_buttons;
1148 | old_buttons = pad.buttons;
1149 |
1150 | if (pressed_buttons & SCE_CTRL_DOWN) {
1151 | current_selection = (current_selection < num_options - 1) ? current_selection + 1 : 0;
1152 | break;
1153 | }
1154 | if (pressed_buttons & SCE_CTRL_UP) {
1155 | current_selection = (current_selection > 0) ? current_selection - 1 : num_options - 1;
1156 | break;
1157 | }
1158 | if (pressed_buttons & SCE_CTRL_CROSS) {
1159 | if(current_selection == count) {
1160 | *menu_action = 1; // Manage models
1161 | } else {
1162 | *menu_action = 0; // Load model
1163 | *selected_index = current_selection;
1164 | }
1165 | return;
1166 | }
1167 | sceKernelDelayThread(16 * 1000);
1168 | }
1169 | }
1170 | }
1171 |
1172 | int main(int argc, char *argv[]) {
1173 | psvDebugScreenInit();
1174 | PsvDebugScreenFont* current_font = psvDebugScreenGetFont();
1175 | PsvDebugScreenFont* scaled_font = psvDebugScreenScaleFont2x(current_font);
1176 | psvDebugScreenSetFont(scaled_font);
1177 | psvDebugScreenPrintf(COLOR_CYAN "Application started.\n" COLOR_RESET);
1178 | netInit();
1179 | httpInit();
1180 |
1181 | while (1) {
1182 | ModelSuite all_suites[] = {
1183 | {"ux0:data/stories15M.bin", "ux0:data/tokenizer.bin", "stories15M", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin", "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.bin", "tokenizer.bin"},
1184 | {"ux0:data/stories260K.bin", "ux0:data/tok512.bin", "stories260K", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin", "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin", "tok512.bin"}
1185 | };
1186 | ModelSuite found_suites[4];
1187 | int found_count = 0;
1188 |
1189 | for (int i = 0; i < sizeof(all_suites) / sizeof(ModelSuite); i++) {
1190 | FILE* model_file = fopen(all_suites[i].model_path, "rb");
1191 | if (model_file) {
1192 | fclose(model_file);
1193 | FILE* tokenizer_file = fopen(all_suites[i].tokenizer_path, "rb");
1194 | if (tokenizer_file) {
1195 | fclose(tokenizer_file);
1196 | found_suites[found_count++] = all_suites[i];
1197 | }
1198 | }
1199 | }
1200 |
1201 | char* checkpoint_path = NULL;
1202 | char* tokenizer_path = NULL;
1203 |
1204 | if (found_count == 0) {
1205 | display_download_menu();
1206 | continue;
1207 | }
1208 |
1209 | int menu_action = 0;
1210 | int selected_index = 0;
1211 | display_model_selection_menu(found_suites, found_count, &selected_index, &menu_action);
1212 |
1213 | if (menu_action == 1) {
1214 | display_manage_models_menu(all_suites, sizeof(all_suites) / sizeof(ModelSuite));
1215 | continue;
1216 | }
1217 |
1218 | checkpoint_path = found_suites[selected_index].model_path;
1219 | tokenizer_path = found_suites[selected_index].tokenizer_path;
1220 |
1221 | float temperature = 1.0f;
1222 | float topp = 0.9f;
1223 | int steps = 0;
1224 | char *prompt = "Once upon a time";
1225 | unsigned long long rng_seed = (unsigned int)time(NULL);
1226 |
1227 | psvDebugScreenClear(0xFF000000);
1228 | int x = 0, y = 0;
1229 | psvDebugScreenSetCoordsXY(&x, &y);
1230 | Transformer transformer;
1231 | build_transformer(&transformer, checkpoint_path);
1232 | if (steps == 0 || steps > transformer.config.seq_len) {
1233 | steps = transformer.config.seq_len;
1234 | }
1235 |
1236 | Tokenizer tokenizer;
1237 | build_tokenizer(&tokenizer, tokenizer_path, transformer.config.vocab_size);
1238 | Sampler sampler;
1239 | build_sampler(&sampler, transformer.config.vocab_size, temperature, topp, rng_seed);
1240 | psvDebugScreenClear(0xFF000000);
1241 | psvDebugScreenSetCoordsXY(&x, &y);
1242 |
1243 | int main_loop_action = 0;
1244 |
1245 | while(1){
1246 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress the X button to start generation.\n" COLOR_RESET);
1247 | uint32_t old_buttons = 0;
1248 | while(1){
1249 | SceCtrlData pad;
1250 | sceCtrlPeekBufferPositive(0, &pad, 1);
1251 | uint32_t pressed = pad.buttons & ~old_buttons;
1252 | old_buttons = pad.buttons;
1253 | if(pressed & SCE_CTRL_CROSS)
1254 | break;
1255 | sceKernelDelayThread(16 * 1000);
1256 | }
1257 |
1258 | psvDebugScreenClear(0xFF000000);
1259 | psvDebugScreenSetCoordsXY(&x, &y);
1260 | generate(&transformer, &tokenizer, &sampler, prompt, steps);
1261 | psvDebugScreenPrintf(COLOR_GREEN "Text generation finished.\n" COLOR_RESET);
1262 |
1263 | psvDebugScreenPrintf(COLOR_YELLOW "\nPress SQUARE for a new story\n" COLOR_RESET);
1264 | psvDebugScreenPrintf(COLOR_YELLOW "Press CIRCLE to change model\n" COLOR_RESET);
1265 | psvDebugScreenPrintf(COLOR_YELLOW "Press X to exit.\n" COLOR_RESET);
1266 |
1267 | int story_loop_action = 0;
1268 | old_buttons = 0;
1269 | while(1) {
1270 | SceCtrlData pad;
1271 | sceCtrlPeekBufferPositive(0, &pad, 1);
1272 | uint32_t pressed_buttons = pad.buttons & ~old_buttons;
1273 | old_buttons = pad.buttons;
1274 |
1275 | if (pressed_buttons & SCE_CTRL_SQUARE) {
1276 | story_loop_action = 0;
1277 | break;
1278 | }
1279 | if (pressed_buttons & SCE_CTRL_CIRCLE) {
1280 | story_loop_action = 1;
1281 | break;
1282 | }
1283 | if (pressed_buttons & SCE_CTRL_CROSS) {
1284 | story_loop_action = 2;
1285 | break;
1286 | }
1287 | sceKernelDelayThread(16 * 1000);
1288 | }
1289 |
1290 | if (story_loop_action == 0) {
1291 | psvDebugScreenClear(0xFF000000);
1292 | psvDebugScreenSetCoordsXY(&x, &y);
1293 | continue;
1294 | } else {
1295 | if (story_loop_action == 2) main_loop_action = 1;
1296 | break;
1297 | }
1298 | }
1299 |
1300 | free_sampler(&sampler);
1301 | free_tokenizer(&tokenizer);
1302 | free_transformer(&transformer);
1303 |
1304 | if (main_loop_action == 1) {
1305 | break;
1306 | }
1307 | }
1308 |
1309 | psvDebugScreenPrintf(COLOR_CYAN "\nCleanup finished. Exiting.\n" COLOR_RESET);
1310 | sceKernelDelayThread(2 * 1000 * 1000);
1311 | sceKernelExitProcess(0);
1312 | return 0;
1313 | }
1314 |
--------------------------------------------------------------------------------