├── .travis.yml
├── M5Stack-Raytracer.ino
├── README.md
├── geometry.h
├── tiny_jpeg_encoder.h
└── tinyraytracer.h


/.travis.yml:
--------------------------------------------------------------------------------
  1 | #
  2 | #
  3 | # OVERRIDES: 
  4 | # env:
  5 | #  global:
  6 | #    - IDE_VERSION=$IDE_VERSION
  7 | #    - PLAFORM=$PLATFORM
  8 | #    - SDAPP_FOLDER=$PWD/examples/M5Stack-SD-Menu/SD-Apps
  9 | #  matrix:
 10 | #    - EXAMPLE="M5Stack-SD-Menu" BOARD="esp32:esp32:m5stack-core-esp32:FlashFreq=80" ARCHIVE_ZIP="SD-Apps-Folder.zip"
 11 | #    - EXAMPLE="M5Stack-SD-Menu" BOARD="esp32:esp32:odroid_esp32:FlashFreq=80" ARCHIVE_ZIP="SD-Apps-Folder-odroid.zip"
 12 | #
 13 | 
 14 | language: generic
 15 | env:
 16 |   global:
 17 |     - IDE_VERSION=1.8.9
 18 |     - REPO_NAME=M5Stack-SD-Updater
 19 |     - SDAPP_FOLDER=$PWD/examples/M5Stack-SD-Menu/SD-Apps
 20 |     - TRAVIS_TAG="v0.4.1"
 21 |     - M5_SD_BUILD_DIR=$TRAVIS_BUILD_DIR/build/SD-Content
 22 |     - APPLICATION_FOLDER="${HOME}/arduino-ide"
 23 |     - SKETCHBOOK_FOLDER="${HOME}/arduino-sketchbook"
 24 |     
 25 |     #- WORK_DIR: ${{ '/home/runner/work' }}
 26 |     #- WORK_SPACE: ${{ github.workspace }}
 27 |     #- M5_SD_BUILD_DIR: ${{ '/home/runner/work/build/SD-Content' }}
 28 |     #- M5_BUILD_DIR: ${{ '/home/runner/work/build/SD-Content' }}
 29 |     #- M5_BURNER_DIR: ${{ '/home/runner/work/build/M5Burner' }}
 30 |     #- SD_UPDATER_BRANCH: ${{ 'master' }} # for the Sd-Updater library (e.g. use 'unstable' for pre-alpha channel)
 31 |     #- M5_CORE_URL: ${{ 'https://github.com/tobozo/ESP32-Chimera-Core' }}
 32 |     #- GITHUB_HOOK_URL: ${{ secrets.GITHUB_HOOK_URL }}
 33 |     #- APP_ROOTURL: ${{ 'https://phpsecu.re/m5stack' }} # https://registry.site/root-folder
 34 |     #- APP_PATH:    ${{ 'apps-awaiting/tobozo/M5Stack-Raytracer.json' }} # /path/to/author/project.json
 35 |     #- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 36 |     #- GITHUB_HOOK_SECRET: ${{ secrets.GITHUB_HOOK_SECRET }}
 37 |     #- REMOTE_APP_SLUG: ${{ 'tobozo_M5Stack-Raytracer' }}
 38 |     #- REMOTE_REPO_SLUG: ${{ 'tobozo/M5Stack-Raytracer' }}
 39 |     
 40 |   matrix:
 41 |     - EXAMPLE="M5Stack-SD-Menu" BOARD="esp32:esp32:m5stack-core-esp32:FlashFreq=80" ARCHIVE_ZIP="SD-Apps-Folder.zip"
 42 |     - EXAMPLE="M5Stack-SD-Menu" BOARD="esp32:esp32:odroid_esp32:FlashFreq=80" ARCHIVE_ZIP="SD-Apps-Folder-odroid.zip"  
 43 |     # - EXAMPLE="M5Stack-SDLoader-Snippet" BOARD="espressif:esp32:m5stack-core-esp32:FlashFreq=80"
 44 | before_install:
 45 |   #- $SDAPP_FOLDER/install_arduino.sh
 46 |   #- cd $TRAVIS_BUILD_DIR ;
 47 |   - git clone https://github.com/per1234/arduino-ci-script.git "${HOME}/scripts/arduino-ci-script"
 48 |   - cd "${HOME}/scripts/arduino-ci-script"
 49 |   # Get new tags from the remote
 50 |   - git fetch --tags
 51 |   # Checkout the latest tag
 52 |   - git checkout $(git describe --tags `git rev-list --tags --max-count=1`)
 53 |   - source "${HOME}/scripts/arduino-ci-script/arduino-ci-script.sh"
 54 |   #- git clone https://github.com/tobozo/M5Stack-App-Registry.git "${HOME}/scripts/M5Stack-App-Registry"
 55 |   #- source "${HOME}/scripts/M5Stack-App-Registry/tools/common.sh"
 56 |   
 57 | install:
 58 |   - gem install git.io
 59 |   - pip install wheel pyserial
 60 |   - set_application_folder "$APPLICATION_FOLDER"
 61 |   - set_sketchbook_folder "$SKETCHBOOK_FOLDER"
 62 |   - install_ide '("1.8.0" "1.8.9" "newest")'
 63 |   - install_library "https://github.com/tobozo/M5Stack-SD-Updater.git"
 64 |   - install_library "https://github.com/tobozo/ESP32-Chimera-Core.git"
 65 |   #- source "${HOME}/scripts/M5Stack-App-Registry/tools/common.sh"
 66 |   - install_package "esp32:esp32" "https://dl.espressif.com/dl/package_esp32_index.json" #  # esp32:esp32:m5stack-core-esp32
 67 |   - export arduino_installed=1
 68 |   #- sudo apt install rename imagemagick
 69 |   #- git submodule update --init --recursive
 70 |   #- cd $SDAPP_FOLDER
 71 |   #- git submodule foreach --recursive git pull origin master
 72 |   #- cd $TRAVIS_BUILD_DIR;
 73 | 
 74 |   #- mkdir -p ~/Arduino/libraries
 75 |   #- ln -s $PWD ~/Arduino/libraries/.  
 76 |   #- echo "Installing extra libraries"
 77 |   #- cd $SDAPP_FOLDER
 78 |   #- ./get-deps.sh
 79 | 
 80 | script:
 81 |   - build_sketch "${TRAVIS_BUILD_DIR}/M5Stack-Raytracer.ino" "esp32:esp32:m5stack-core-esp32:FlashFreq=80,UploadSpeed=921600" "false" "oldest" "newest"
 82 |   
 83 |   #- export REMOTE_APP_URL=$APP_ROOTURL/$APP_PATH
 84 |   #- source $WORK_SPACE/tools/common.sh
 85 |   #- get_remote_app $REMOTE_APP_URL
 86 |   #- bash $WORK_SPACE/tools/deploy.sh -a$GITHUB_TOKEN -s$GITHUB_REPOSITORY -d$M5_BUILD_DIR
 87 | 
 88 |   #- cd $TRAVIS_BUILD_DIR;
 89 |   #- arduino --pref "compiler.warning_level=none" --save-prefs
 90 |   #- arduino --pref "build.warn_data_percentage=75" --save-prefs
 91 |   #- arduino --pref "boardsmanager.additional.urls=https://dl.espressif.com/dl/package_esp32_index.json" --save-prefs
 92 |   #- arduino --install-boards esp32:esp32 &>/dev/null
 93 |   #- arduino --board $BOARD --save-prefs
 94 |   #- arduino --preserve-temp-files --verbose-build --verify $PWD/examples/$EXAMPLE/$EXAMPLE.ino &>/dev/null
 95 |   #- find /tmp -name \*.partitions.bin -exec rm {} \; #
 96 |   #- find /tmp -name \*.bin -exec mv {} $M5_SD_BUILD_DIR/TobozoLauncher.bin \; #
 97 |   #- cp $M5_SD_BUILD_DIR/TobozoLauncher.bin $M5_SD_BUILD_DIR/menu.bin
 98 |   #- echo "Fake Binary" >> $M5_SD_BUILD_DIR/Downloader.bin
 99 |   #- echo "Main APPs Compilation successful, now compiling deps"
100 |   #- cd $SDAPP_FOLDER
101 |   #- ./gen-apps.sh
102 |   #- echo "Fetching precompiled projects"
103 |   #- ./get-precompiled.sh
104 |   #- ls $M5_SD_BUILD_DIR/ -la
105 |   #- sleep 15 # give some time to the logs to come up  
106 | 
107 | before_deploy:
108 |   # Set up git user name and tag this commit
109 |   #- cd $PWD
110 |   
111 |   #if ! [[ $TRAVIS_TAG ]]; then
112 |   #  git config --global user.email "travis@travis-ci.org"
113 |   #  git config --global user.name "Travis CI"
114 |   #  git tag ${TRAVIS_TAG}
115 |   #fi
116 | 
117 |   #- cd /home/travis/build/tobozo/
118 |   #- echo "#define M5_SD_UPDATER_VERSION F(\"${TRAVIS_TAG}\")" > $REPO_NAME/src/gitTagVersion.h
119 |   #- rm -Rf $REPO_NAME/examples/M5Stack-SD-Menu/SD-Apps
120 |   #- rm -Rf $REPO_NAME/examples/M5Stack-SD-Menu/SD-Content
121 |   #- zip -r $TRAVIS_BUILD_DIR/$REPO_NAME.zip $REPO_NAME -x *.git*
122 |   #- cd $M5_SD_BUILD_DIR 
123 |   #- zip -r $TRAVIS_BUILD_DIR/$ARCHIVE_ZIP ./
124 |   #- cd $TRAVIS_BUILD_DIR
125 | 
126 |   # - export BODY=$(cat CHANGELOG.md) # boo! Travis doesn't like multiline body
127 |   
128 | #deploy:
129 | #  provider: releases
130 | #  api_key: $GH_TOKEN
131 | #  overwrite: true
132 | #  skip_cleanup: true
133 | #  target_commitish: $TRAVIS_COMMIT
134 | #  tag_name: $TRAVIS_TAG
135 | #  name: SD-Extras-autorelease $TRAVIS_TAG
136 | #  body: Automated release from Travis CI with added SD Apps binary files from Arduino compilation 
137 | #  draft: true
138 | #  file_glob: true
139 | #  file: 
140 | #    - $TRAVIS_BUILD_DIR/$ARCHIVE_ZIP
141 | #  #  - $TRAVIS_BUILD_DIR/$REPO_NAME.zip
142 | #  #file: $TRAVIS_BUILD_DIR/SD-Apps-Folder.zip
143 | #  on:
144 | #    tags: true
145 | #    branch: master
146 | 
147 | notifications:
148 |   email:
149 |     on_success: never
150 |     on_failure: always
151 |   webhooks:
152 |     urls:
153 |       #- https://webhooks.gitter.im/e/2d1ffb10865e412333a9
154 |     on_success: change  # options: [always|never|change] default: always
155 |     on_failure: always  # options: [always|never|change] default: always
156 |     on_start: never     # options: [always|never|change] default: always
157 | 
158 |     
159 |     
160 | 


--------------------------------------------------------------------------------
/M5Stack-Raytracer.ino:
--------------------------------------------------------------------------------
  1 | 
  2 | bool hasPsram = false;
  3 | 
  4 | #include "FS.h"
  5 | #include "SD.h"
  6 | #include "tinyraytracer.h" // a modified version of https://github.com/ssloy/tinyraytracer
  7 | #include "tiny_jpeg_encoder.h" // a modified version of https://github.com/serge-rgb/TinyJPEG
  8 | 
  9 | #include <M5Stack.h>
 10 | #include <M5StackUpdater.h>   // https://github.com/tobozo/M5Stack-SD-Updater/
 11 | 
 12 | 
 13 | 
 14 | 
 15 | struct point {
 16 |   float initialx;
 17 |   float initialy;
 18 |   float initialz;
 19 |   float x;
 20 |   float y;
 21 |   float z;
 22 | };
 23 | 
 24 | struct color {
 25 |   float r;
 26 |   float g;
 27 |   float b;
 28 | };
 29 | 
 30 | point ivorySphereCoords = { 0, -1.5, -14 };
 31 | color ivoryColor =  {0.4, 0.4, 0.3 };
 32 | 
 33 | point glassSphereCoorsd = { 0, -1.5, -14 };
 34 | 
 35 | 
 36 | 
 37 | void raytrace(uint16_t x, uint16_t y, uint16_t width, uint16_t height, float fov) {
 38 |   Material      ivory(1.0, Vec4f(0.6,  0.3, 0.1, 0.0), Vec3f(ivoryColor.r, ivoryColor.g, ivoryColor.b),   50.);
 39 |   Material      glass(1.5, Vec4f(0.0,  0.5, 0.1, 0.8), Vec3f(0.6, 0.7, 0.8),  125.);
 40 |   //Material red_rubber(1.0, Vec4f(0.9,  0.1, 0.0, 0.0), Vec3f(0.3, 0.1, 0.1),   10.);
 41 |   Material     mirror(1.0, Vec4f(0.0, 10.0, 0.8, 0.0), Vec3f(1.0, 1.0, 1.0), 1425.);
 42 | 
 43 |   std::vector<Sphere> spheres;
 44 |   spheres.push_back(Sphere(Vec3f(ivorySphereCoords.x, ivorySphereCoords.y, ivorySphereCoords.z), 2, ivory));
 45 |   spheres.push_back(Sphere(Vec3f(glassSphereCoorsd.x, glassSphereCoorsd.y, glassSphereCoorsd.z), 2, glass));
 46 |   //spheres.push_back(Sphere(Vec3f( 1.5, -0.5, -18), 3, red_rubber));
 47 |   spheres.push_back(Sphere(Vec3f( 7,    5,   -18), 4,     mirror));
 48 | 
 49 |   std::vector<Light>  lights;
 50 |   lights.push_back(Light(Vec3f(-20, 20,  20), 1.5));
 51 |   lights.push_back(Light(Vec3f( 30, 50, -25), 1.8));
 52 |   lights.push_back(Light(Vec3f( 30, 20,  30), 1.7));
 53 | 
 54 |   render(x, y, width, height, spheres, lights, fov);
 55 | }
 56 | 
 57 | 
 58 | 
 59 | 
 60 | void setup() {
 61 | 
 62 |   M5.begin();
 63 |   Wire.begin();
 64 | 
 65 |   if(digitalRead(BUTTON_A_PIN) == 0) {
 66 |     Serial.println("Will Load menu binary");
 67 |     updateFromFS(SD);
 68 |     ESP.restart();
 69 |   }
 70 | 
 71 |   if( !psramInit() ) {
 72 |     Serial.println("PSRAM FAIL");
 73 |     /*
 74 |     while(1) {
 75 |       ;
 76 |     }*/
 77 |   } else {
 78 |     hasPsram = true;
 79 |   }
 80 | 
 81 |   if(!SD.exists("/jpg")) {
 82 |     SD.mkdir("/jpg");
 83 |   }
 84 | 
 85 |   tinyRayTracerInit();
 86 |   tinyJpegEncoderInit();
 87 | 
 88 |   M5.Lcd.begin();
 89 |   M5.Lcd.setRotation( 1 );
 90 |   M5.Lcd.setTextColor(YELLOW);
 91 |   M5.Lcd.fillScreen(BLACK);
 92 | 
 93 | }
 94 | 
 95 | bool rendered = false;
 96 | 
 97 | void loop() {
 98 | 
 99 |   uint16_t width = 128;
100 |   uint16_t height = 64;
101 | 
102 |   if( !hasPsram ) {
103 |     if(width>128)  width  = 128;
104 |     if(height>128) height = 128;
105 |   }
106 | 
107 |   uint16_t x = (M5.Lcd.width() - width) / 2;
108 |   uint16_t y = (M5.Lcd.height() - height) / 2;
109 | 
110 |   char * fName = NULL;
111 |   fName = (char*)malloc(32);
112 | 
113 |   byte looplength = 60;
114 | 
115 |   if(!rendered) {
116 | 
117 |     M5.Lcd.setCursor(0,0);
118 |     M5.Lcd.print("Rendering");
119 |     unsigned long started = millis();
120 | 
121 |     glassSphereCoorsd.x = glassSphereCoorsd.initialx;
122 |     glassSphereCoorsd.z = glassSphereCoorsd.initialz;
123 |     glassSphereCoorsd.y = glassSphereCoorsd.initialy;
124 | 
125 |     for(byte framenum=0; framenum<looplength;framenum++) {
126 | 
127 |       sprintf(fName, "/jpg/out%d.jpg", framenum);
128 |       const char* jpegFileName = fName;
129 |       float myfov = 0.5 + (float)framenum / looplength;
130 | 
131 |       ivorySphereCoords.x = ivorySphereCoords.initialx + (4*sin( ((float)framenum/looplength)*PI*2 ));
132 |       ivorySphereCoords.z = ivorySphereCoords.initialz + (2*cos( ((float)framenum/looplength)*PI*2 ));
133 |       ivorySphereCoords.y = ivorySphereCoords.initialy + fabs(2.75*cos( ((float)((framenum*2)%looplength)/looplength)*PI*2 ));
134 | 
135 |       //ivoryColor.r = fabs(.1*sin( ((float)framenum/looplength)*PI*2 )) + .3;
136 |       //ivoryColor.g = ivoryColor.r;
137 |       //ivoryColor.b = ivoryColor.r - 0.1;
138 | 
139 |       raytrace(x, y, width, height, 0.6);
140 | 
141 |       if ( !tje_encode_to_file(jpegFileName, width, height, 3 /*3=RGB,4=RGBA*/, rgbBuffer) ) {
142 |         Serial.println("Could not write JPEG\n");
143 |       } else {
144 |         Serial.printf("[%d / %d] Rendering saved jpeg %s with fov %f\n", ESP.getFreeHeap(), ESP.getFreePsram(), jpegFileName, myfov);
145 |         M5.Lcd.setCursor(0,10);
146 |         M5.Lcd.fillRect(0,10,M5.Lcd.width(), 20, 0);
147 | 
148 |         M5.Lcd.printf("Rendered %d out of %d", framenum+1, looplength);
149 |         M5.Lcd.setCursor(0,20);
150 | 
151 |         float framelen = ( (millis() - started) / (framenum+1) ) / 1000;
152 |         int remaining = (looplength - framenum) * framelen;
153 | 
154 |         M5.Lcd.printf("Estimated time remaining: %d seconds", remaining);
155 |         M5.Lcd.drawJpgFile(SD, jpegFileName, x, y, width, height, 0, 0, JPEG_DIV_NONE);
156 |       }
157 |     }
158 |     rendered = true;
159 |     unsigned long ended = (millis() - started)/1000;
160 |     Serial.printf("Rendered animation in %d seconds\n", ended);
161 |     M5.Lcd.fillRect(0,0,M5.Lcd.width(), 30, 0);
162 | 
163 |   }
164 | 
165 |   for(byte framenum=0; framenum<looplength; framenum++) {
166 |     sprintf(fName, "/jpg/out%d.jpg", framenum);
167 |     const char* jpegFileName = fName;
168 |     M5.Lcd.drawJpgFile(SD, jpegFileName, x, y, width, height, 0, 0, JPEG_DIV_NONE);
169 |   }
170 | 
171 | 
172 | }
173 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # M5Stack-Raytracer
 2 | 
 3 | An M5Stack study based on Dmitry V. Sokolov's tinyraytracer https://github.com/ssloy/tinyraytracer and Sergio Gonzalez's tinyJPEG https://github.com/serge-rgb/TinyJPEG
 4 | 
 5 | ![image](https://user-images.githubusercontent.com/1893754/51555993-42acae80-1e79-11e9-837e-f5687ee09e0a.png)
 6 | 
 7 | ![demo](https://user-images.githubusercontent.com/1893754/52477825-991d2b00-2ba3-11e9-9505-da2c40d3d725.gif)
 8 | 
 9 | Requirements: M5Stack with or without PSRam
10 | 
11 | 


--------------------------------------------------------------------------------
/geometry.h:
--------------------------------------------------------------------------------
 1 | // modified from https://github.com/ssloy/tinyraytracer
 2 | #ifndef __GEOMETRY_H__
 3 | #define __GEOMETRY_H__
 4 | #include <cmath>
 5 | #include <vector>
 6 | #include <cassert>
 7 | #include <iostream>
 8 | 
 9 | template<class T> inline Print &operator <<(Print &obj, T arg) {
10 |   obj.write(arg); return obj; 
11 | };
12 | 
13 | template <size_t DIM, typename T> struct vec {
14 |     vec() { for (size_t i=DIM; i--; data_[i] = T()); }
15 |           T& operator[](const size_t i)       { assert(i<DIM); return data_[i]; }
16 |     const T& operator[](const size_t i) const { assert(i<DIM); return data_[i]; }
17 | private:
18 |     T data_[DIM];
19 | };
20 | 
21 | typedef vec<2, float> Vec2f;
22 | typedef vec<3, float> Vec3f;
23 | typedef vec<3, int  > Vec3i;
24 | typedef vec<4, float> Vec4f;
25 | 
26 | template <typename T> struct vec<2,T> {
27 |     vec() : x(T()), y(T()) {}
28 |     vec(T X, T Y) : x(X), y(Y) {}
29 |     template <class U> vec<2,T>(const vec<2,U> &v);
30 |           T& operator[](const size_t i)       { assert(i<2); return i<=0 ? x : y; }
31 |     const T& operator[](const size_t i) const { assert(i<2); return i<=0 ? x : y; }
32 |     T x,y;
33 | };
34 | 
35 | template <typename T> struct vec<3,T> {
36 |     vec() : x(T()), y(T()), z(T()) {}
37 |     vec(T X, T Y, T Z) : x(X), y(Y), z(Z) {}
38 |           T& operator[](const size_t i)       { assert(i<3); return i<=0 ? x : (1==i ? y : z); }
39 |     const T& operator[](const size_t i) const { assert(i<3); return i<=0 ? x : (1==i ? y : z); }
40 |     float norm() { return std::sqrt(x*x+y*y+z*z); }
41 |     vec<3,T> & normalize(T l=1) { *this = (*this)*(l/norm()); return *this; }
42 |     T x,y,z;
43 | };
44 | 
45 | template <typename T> struct vec<4,T> {
46 |     vec() : x(T()), y(T()), z(T()), w(T()) {}
47 |     vec(T X, T Y, T Z, T W) : x(X), y(Y), z(Z), w(W) {}
48 |           T& operator[](const size_t i)       { assert(i<4); return i<=0 ? x : (1==i ? y : (2==i ? z : w)); }
49 |     const T& operator[](const size_t i) const { assert(i<4); return i<=0 ? x : (1==i ? y : (2==i ? z : w)); }
50 |     T x,y,z,w;
51 | };
52 | 
53 | template<size_t DIM,typename T> T operator*(const vec<DIM,T>& lhs, const vec<DIM,T>& rhs) {
54 |     T ret = T();
55 |     for (size_t i=DIM; i--; ret+=lhs[i]*rhs[i]);
56 |     return ret;
57 | }
58 | 
59 | template<size_t DIM,typename T>vec<DIM,T> operator+(vec<DIM,T> lhs, const vec<DIM,T>& rhs) {
60 |     for (size_t i=DIM; i--; lhs[i]+=rhs[i]);
61 |     return lhs;
62 | }
63 | 
64 | template<size_t DIM,typename T>vec<DIM,T> operator-(vec<DIM,T> lhs, const vec<DIM,T>& rhs) {
65 |     for (size_t i=DIM; i--; lhs[i]-=rhs[i]);
66 |     return lhs;
67 | }
68 | 
69 | template<size_t DIM,typename T,typename U> vec<DIM,T> operator*(const vec<DIM,T> &lhs, const U& rhs) {
70 |     vec<DIM,T> ret;
71 |     for (size_t i=DIM; i--; ret[i]=lhs[i]*rhs);
72 |     return ret;
73 | }
74 | 
75 | template<size_t DIM,typename T> vec<DIM,T> operator-(const vec<DIM,T> &lhs) {
76 |     return lhs*T(-1);
77 | }
78 | 
79 | template <typename T> vec<3,T> cross(vec<3,T> v1, vec<3,T> v2) {
80 |     return vec<3,T>(v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x);
81 | }
82 | 
83 | template <size_t DIM, typename T> std::ostream& operator<<(std::ostream& out, const vec<DIM,T>& v) {
84 |     for(unsigned int i=0; i<DIM; i++) {
85 |         out << v[i] << " " ;
86 |     }
87 |     return out ;
88 | }
89 | #endif //__GEOMETRY_H__
90 | 


--------------------------------------------------------------------------------
/tiny_jpeg_encoder.h:
--------------------------------------------------------------------------------
   1 | /**
   2 |  * tiny_jpeg.h
   3 |  *
   4 |  * Tiny JPEG Encoder
   5 |  *  - Sergio Gonzalez
   6 |  *
   7 |  * This is a readable and simple single-header JPEG encoder.
   8 |  *
   9 |  * Features
  10 |  *  - Implements Baseline DCT JPEG compression.
  11 |  *  - No dynamic allocations.
  12 |  *
  13 |  * This library is coded in the spirit of the stb libraries and mostly follows
  14 |  * the stb guidelines.
  15 |  *
  16 |  * It is written in C99. And depends on the C standard library.
  17 |  * Works with C++11
  18 |  *
  19 |  *
  20 |  * ==== Thanks ====
  21 |  *
  22 |  *  AssociationSirius (Bug reports)
  23 |  *  Bernard van Gastel (Thread-safe defaults, BSD compilation)
  24 |  *
  25 |  *
  26 |  * ==== License ====
  27 |  *
  28 |  * This software is in the public domain. Where that dedication is not
  29 |  * recognized, you are granted a perpetual, irrevocable license to copy and
  30 |  * modify this file as you see fit.
  31 |  *
  32 |  */
  33 | 
  34 | 
  35 | 
  36 | #define TJE_IMPLEMENTATION
  37 | 
  38 | uint8_t** huffsize = NULL;
  39 | uint16_t** huffcode = NULL;
  40 | 
  41 | void tinyJpegEncoderInit() {
  42 |   if(hasPsram) {
  43 |     huffsize = (uint8_t**)ps_calloc(4*257, sizeof(uint8_t));
  44 |     for(byte i=0; i<4; i++) {
  45 |       huffsize[i] = (uint8_t*)ps_calloc(257, sizeof(uint8_t) );
  46 |     }
  47 |     huffcode = (uint16_t**)ps_calloc(4*256, sizeof(uint16_t));
  48 |     for(byte i=0; i<4; i++) {
  49 |       huffcode[i] = (uint16_t*)ps_calloc(256, sizeof(uint16_t) );
  50 |     }
  51 |   } else {
  52 |     huffsize = (uint8_t**)calloc(4*257, sizeof(uint8_t));
  53 |     for(byte i=0; i<4; i++) {
  54 |       huffsize[i] = (uint8_t*)calloc(257, sizeof(uint8_t) );
  55 |     }
  56 |     huffcode = (uint16_t**)calloc(4*256, sizeof(uint16_t));
  57 |     for(byte i=0; i<4; i++) {
  58 |       huffcode[i] = (uint16_t*)calloc(256, sizeof(uint16_t) );
  59 |     }
  60 |   }
  61 | }
  62 | 
  63 | 
  64 | #ifdef __cplusplus
  65 | extern "C"
  66 | {
  67 | #endif
  68 | 
  69 | #if defined(__GNUC__) || defined(__clang__)
  70 | #pragma GCC diagnostic push
  71 | #pragma GCC diagnostic ignored "-Wmissing-field-initializers"  // We use {0}, which will zero-out the struct.
  72 | #pragma GCC diagnostic ignored "-Wmissing-braces"
  73 | #pragma GCC diagnostic ignored "-Wpadded"
  74 | #endif
  75 | 
  76 | // ============================================================
  77 | // Public interface:
  78 | // ============================================================
  79 | 
  80 | #ifndef TJE_HEADER_GUARD
  81 | #define TJE_HEADER_GUARD
  82 | 
  83 | // - tje_encode_to_file -
  84 | //
  85 | // Usage:
  86 | //  Takes bitmap data and writes a JPEG-encoded image to disk.
  87 | //
  88 | //  PARAMETERS
  89 | //      dest_path:          filename to which we will write. e.g. "out.jpg"
  90 | //      width, height:      image size in pixels
  91 | //      num_components:     3 is RGB. 4 is RGBA. Those are the only supported values
  92 | //      src_data:           pointer to the pixel data.
  93 | //
  94 | //  RETURN:
  95 | //      0 on error. 1 on success.
  96 | 
  97 | int tje_encode_to_file(const char* dest_path,
  98 |                        const int width,
  99 |                        const int height,
 100 |                        const int num_components,
 101 |                        const unsigned char* src_data);
 102 | 
 103 | // - tje_encode_to_file_at_quality -
 104 | //
 105 | // Usage:
 106 | //  Takes bitmap data and writes a JPEG-encoded image to disk.
 107 | //
 108 | //  PARAMETERS
 109 | //      dest_path:          filename to which we will write. e.g. "out.jpg"
 110 | //      quality:            3: Highest. Compression varies wildly (between 1/3 and 1/20).
 111 | //                          2: Very good quality. About 1/2 the size of 3.
 112 | //                          1: Noticeable. About 1/6 the size of 3, or 1/3 the size of 2.
 113 | //      width, height:      image size in pixels
 114 | //      num_components:     3 is RGB. 4 is RGBA. Those are the only supported values
 115 | //      src_data:           pointer to the pixel data.
 116 | //
 117 | //  RETURN:
 118 | //      0 on error. 1 on success.
 119 | 
 120 | int tje_encode_to_file_at_quality(const char* dest_path,
 121 |                                   const int quality,
 122 |                                   const int width,
 123 |                                   const int height,
 124 |                                   const int num_components,
 125 |                                   const unsigned char* src_data);
 126 | 
 127 | // - tje_encode_with_func -
 128 | //
 129 | // Usage
 130 | //  Same as tje_encode_to_file_at_quality, but it takes a callback that knows
 131 | //  how to handle (or ignore) `context`. The callback receives an array `data`
 132 | //  of `size` bytes, which can be written directly to a file. There is no need
 133 | //  to free the data.
 134 | 
 135 | typedef void tje_write_func(void* context, void* data, int size);
 136 | 
 137 | int tje_encode_with_func(tje_write_func* func,
 138 |                          void* context,
 139 |                          const int quality,
 140 |                          const int width,
 141 |                          const int height,
 142 |                          const int num_components,
 143 |                          const unsigned char* src_data);
 144 | 
 145 | #endif // TJE_HEADER_GUARD
 146 | 
 147 | 
 148 | 
 149 | // Implementation: In exactly one of the source files of your application,
 150 | // define TJE_IMPLEMENTATION and include tiny_jpeg.h
 151 | 
 152 | // ============================================================
 153 | // Internal
 154 | // ============================================================
 155 | #ifdef TJE_IMPLEMENTATION
 156 | 
 157 | 
 158 | #define tjei_min(a, b) ((a) < b) ? (a) : (b);
 159 | #define tjei_max(a, b) ((a) < b) ? (b) : (a);
 160 | 
 161 | 
 162 | #if defined(_MSC_VER)
 163 | #define TJEI_FORCE_INLINE __forceinline
 164 | // #define TJEI_FORCE_INLINE __declspec(noinline)  // For profiling
 165 | #else
 166 | #define TJEI_FORCE_INLINE static // TODO: equivalent for gcc & clang
 167 | #endif
 168 | 
 169 | // Only use zero for debugging and/or inspection.
 170 | #define TJE_USE_FAST_DCT 1
 171 | 
 172 | // C std lib
 173 | #include <assert.h>
 174 | #include <inttypes.h>
 175 | #include <math.h>   // floorf, ceilf
 176 | #include <stdio.h>  // FILE, puts
 177 | #include <string.h> // memcpy
 178 | 
 179 | 
 180 | #define TJEI_BUFFER_SIZE 1024
 181 | /*
 182 | #ifdef _WIN32
 183 | 
 184 | #include <windows.h>
 185 | #ifndef snprintf
 186 | #define snprintf sprintf_s
 187 | #endif
 188 | // Not quite the same but it works for us. If I am not mistaken, it differs
 189 | // only in the return value.
 190 | 
 191 | #endif
 192 | */
 193 | #ifndef NDEBUG
 194 | 
 195 | 
 196 | /*
 197 | #ifdef _WIN32
 198 | #define tje_log(msg) OutputDebugStringA(msg)
 199 | #elif defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
 200 | #define tje_log(msg) puts(msg)
 201 | 
 202 | #else
 203 | #warning "need a tje_log definition for your platform for debugging purposes (not needed if compiling with NDEBUG)"
 204 | #endif
 205 | */
 206 | #define tje_log(msg) log_e(msg)
 207 | 
 208 | #else  // NDEBUG
 209 | #define tje_log(msg)
 210 | #endif  // NDEBUG
 211 | 
 212 | 
 213 | typedef struct
 214 | {
 215 |     void*           context;
 216 |     tje_write_func* func;
 217 | } TJEWriteContext;
 218 | 
 219 | typedef struct
 220 | {
 221 |     // Huffman data.
 222 |     uint8_t         ehuffsize[4][257];
 223 |     uint16_t        ehuffcode[4][256];
 224 |     uint8_t const * ht_bits[4];
 225 |     uint8_t const * ht_vals[4];
 226 | 
 227 |     // Cuantization tables.
 228 |     uint8_t         qt_luma[64];
 229 |     uint8_t         qt_chroma[64];
 230 | 
 231 |     // fwrite by default. User-defined when using tje_encode_with_func.
 232 |     TJEWriteContext write_context;
 233 | 
 234 |     // Buffered output. Big performance win when using the usual stdlib implementations.
 235 |     size_t          output_buffer_count;
 236 |     uint8_t         output_buffer[TJEI_BUFFER_SIZE];
 237 | } TJEState;
 238 | 
 239 | // ============================================================
 240 | // Table definitions.
 241 | //
 242 | // The spec defines tjei_default reasonably good quantization matrices and huffman
 243 | // specification tables.
 244 | //
 245 | //
 246 | // Instead of hard-coding the final huffman table, we only hard-code the table
 247 | // spec suggested by the specification, and then derive the full table from
 248 | // there.  This is only for didactic purposes but it might be useful if there
 249 | // ever is the case that we need to swap huffman tables from various sources.
 250 | // ============================================================
 251 | 
 252 | 
 253 | // K.1 - suggested luminance QT
 254 | static const uint8_t tjei_default_qt_luma_from_spec[] =
 255 | {
 256 |    16,11,10,16, 24, 40, 51, 61,
 257 |    12,12,14,19, 26, 58, 60, 55,
 258 |    14,13,16,24, 40, 57, 69, 56,
 259 |    14,17,22,29, 51, 87, 80, 62,
 260 |    18,22,37,56, 68,109,103, 77,
 261 |    24,35,55,64, 81,104,113, 92,
 262 |    49,64,78,87,103,121,120,101,
 263 |    72,92,95,98,112,100,103, 99,
 264 | };
 265 | 
 266 | // Unused
 267 | #if 0
 268 | static const uint8_t tjei_default_qt_chroma_from_spec[] =
 269 | {
 270 |     // K.1 - suggested chrominance QT
 271 |    17,18,24,47,99,99,99,99,
 272 |    18,21,26,66,99,99,99,99,
 273 |    24,26,56,99,99,99,99,99,
 274 |    47,66,99,99,99,99,99,99,
 275 |    99,99,99,99,99,99,99,99,
 276 |    99,99,99,99,99,99,99,99,
 277 |    99,99,99,99,99,99,99,99,
 278 |    99,99,99,99,99,99,99,99,
 279 | };
 280 | #endif
 281 | 
 282 | static const uint8_t tjei_default_qt_chroma_from_paper[] =
 283 | {
 284 |     // Example QT from JPEG paper
 285 |     16,  12, 14,  14, 18, 24,  49,  72,
 286 |     11,  10, 16,  24, 40, 51,  61,  12,
 287 |     13,  17, 22,  35, 64, 92,  14,  16,
 288 |     22,  37, 55,  78, 95, 19,  24,  29,
 289 |     56,  64, 87,  98, 26, 40,  51,  68,
 290 |     81, 103, 112, 58, 57, 87,  109, 104,
 291 |     121,100, 60,  69, 80, 103, 113, 120,
 292 |     103, 55, 56,  62, 77, 92,  101, 99,
 293 | };
 294 | 
 295 | // == Procedure to 'deflate' the huffman tree: JPEG spec, C.2
 296 | 
 297 | // Number of 16 bit values for every code length. (K.3.3.1)
 298 | static const uint8_t tjei_default_ht_luma_dc_len[16] =
 299 | {
 300 |     0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0
 301 | };
 302 | // values
 303 | static const uint8_t tjei_default_ht_luma_dc[12] =
 304 | {
 305 |     0,1,2,3,4,5,6,7,8,9,10,11
 306 | };
 307 | 
 308 | // Number of 16 bit values for every code length. (K.3.3.1)
 309 | static const uint8_t tjei_default_ht_chroma_dc_len[16] =
 310 | {
 311 |     0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0
 312 | };
 313 | // values
 314 | static const uint8_t tjei_default_ht_chroma_dc[12] =
 315 | {
 316 |     0,1,2,3,4,5,6,7,8,9,10,11
 317 | };
 318 | 
 319 | // Same as above, but AC coefficients.
 320 | static const uint8_t tjei_default_ht_luma_ac_len[16] =
 321 | {
 322 |     0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d
 323 | };
 324 | static const uint8_t tjei_default_ht_luma_ac[] =
 325 | {
 326 |     0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
 327 |     0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0,
 328 |     0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28,
 329 |     0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
 330 |     0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
 331 |     0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
 332 |     0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
 333 |     0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
 334 |     0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2,
 335 |     0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
 336 |     0xF9, 0xFA
 337 | };
 338 | 
 339 | static const uint8_t tjei_default_ht_chroma_ac_len[16] =
 340 | {
 341 |     0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77
 342 | };
 343 | static const uint8_t tjei_default_ht_chroma_ac[] =
 344 | {
 345 |     0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
 346 |     0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0,
 347 |     0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26,
 348 |     0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
 349 |     0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
 350 |     0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
 351 |     0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
 352 |     0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
 353 |     0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
 354 |     0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
 355 |     0xF9, 0xFA
 356 | };
 357 | 
 358 | 
 359 | // ============================================================
 360 | // Code
 361 | // ============================================================
 362 | 
 363 | // Zig-zag order:
 364 | static const uint8_t tjei_zig_zag[64] =
 365 | {
 366 |     0,   1,  5,  6, 14, 15, 27, 28,
 367 |     2,   4,  7, 13, 16, 26, 29, 42,
 368 |     3,   8, 12, 17, 25, 30, 41, 43,
 369 |     9,  11, 18, 24, 31, 40, 44, 53,
 370 |     10, 19, 23, 32, 39, 45, 52, 54,
 371 |     20, 22, 33, 38, 46, 51, 55, 60,
 372 |     21, 34, 37, 47, 50, 56, 59, 61,
 373 |     35, 36, 48, 49, 57, 58, 62, 63,
 374 | };
 375 | 
 376 | // Memory order as big endian. 0xhilo -> 0xlohi which looks as 0xhilo in memory.
 377 | static uint16_t tjei_be_word(const uint16_t le_word)
 378 | {
 379 |     uint16_t lo = (le_word & 0x00ff);
 380 |     uint16_t hi = ((le_word & 0xff00) >> 8);
 381 |     return (uint16_t)((lo << 8) | hi);
 382 | }
 383 | 
 384 | // ============================================================
 385 | // The following structs exist only for code clarity, debugability, and
 386 | // readability. They are used when writing to disk, but it is useful to have
 387 | // 1-packed-structs to document how the format works, and to inspect memory
 388 | // while developing.
 389 | // ============================================================
 390 | 
 391 | static const uint8_t tjeik_jfif_id[] = "JFIF";
 392 | static const uint8_t tjeik_com_str[] = "Created by Tiny JPEG Encoder";
 393 | 
 394 | // TODO: Get rid of packed structs!
 395 | #pragma pack(push)
 396 | #pragma pack(1)
 397 | typedef struct
 398 | {
 399 |     uint16_t SOI;
 400 |     // JFIF header.
 401 |     uint16_t APP0;
 402 |     uint16_t jfif_len;
 403 |     uint8_t  jfif_id[5];
 404 |     uint16_t version;
 405 |     uint8_t  units;
 406 |     uint16_t x_density;
 407 |     uint16_t y_density;
 408 |     uint8_t  x_thumb;
 409 |     uint8_t  y_thumb;
 410 | } TJEJPEGHeader;
 411 | 
 412 | typedef struct
 413 | {
 414 |     uint16_t com;
 415 |     uint16_t com_len;
 416 |     char     com_str[sizeof(tjeik_com_str) - 1];
 417 | } TJEJPEGComment;
 418 | 
 419 | // Helper struct for TJEFrameHeader (below).
 420 | typedef struct
 421 | {
 422 |     uint8_t  component_id;
 423 |     uint8_t  sampling_factors;    // most significant 4 bits: horizontal. 4 LSB: vertical (A.1.1)
 424 |     uint8_t  qt;                  // Quantization table selector.
 425 | } TJEComponentSpec;
 426 | 
 427 | typedef struct
 428 | {
 429 |     uint16_t         SOF;
 430 |     uint16_t         len;                   // 8 + 3 * frame.num_components
 431 |     uint8_t          precision;             // Sample precision (bits per sample).
 432 |     uint16_t         height;
 433 |     uint16_t         width;
 434 |     uint8_t          num_components;        // For this implementation, will be equal to 3.
 435 |     TJEComponentSpec component_spec[3];
 436 | } TJEFrameHeader;
 437 | 
 438 | typedef struct
 439 | {
 440 |     uint8_t component_id;                 // Just as with TJEComponentSpec
 441 |     uint8_t dc_ac;                        // (dc|ac)
 442 | } TJEFrameComponentSpec;
 443 | 
 444 | typedef struct
 445 | {
 446 |     uint16_t              SOS;
 447 |     uint16_t              len;
 448 |     uint8_t               num_components;  // 3.
 449 |     TJEFrameComponentSpec component_spec[3];
 450 |     uint8_t               first;  // 0
 451 |     uint8_t               last;  // 63
 452 |     uint8_t               ah_al;  // o
 453 | } TJEScanHeader;
 454 | #pragma pack(pop)
 455 | 
 456 | 
 457 | static void tjei_write(TJEState* state, const void* data, size_t num_bytes, size_t num_elements)
 458 | {
 459 |     size_t to_write = num_bytes * num_elements;
 460 | 
 461 |     // Cap to the buffer available size and copy memory.
 462 |     size_t capped_count = tjei_min(to_write, TJEI_BUFFER_SIZE - 1 - state->output_buffer_count);
 463 | 
 464 |     memcpy(state->output_buffer + state->output_buffer_count, data, capped_count);
 465 |     state->output_buffer_count += capped_count;
 466 | 
 467 |     assert (state->output_buffer_count <= TJEI_BUFFER_SIZE - 1);
 468 | 
 469 |     // Flush the buffer.
 470 |     if ( state->output_buffer_count == TJEI_BUFFER_SIZE - 1 ) {
 471 |         state->write_context.func(state->write_context.context, state->output_buffer, (int)state->output_buffer_count);
 472 |         state->output_buffer_count = 0;
 473 |     }
 474 | 
 475 |     // Recursively calling ourselves with the rest of the buffer.
 476 |     if (capped_count < to_write) {
 477 |         tjei_write(state, (uint8_t*)data+capped_count, to_write - capped_count, 1);
 478 |     }
 479 | }
 480 | 
 481 | static void tjei_write_DQT(TJEState* state, const uint8_t* matrix, uint8_t id)
 482 | {
 483 |     uint16_t DQT = tjei_be_word(0xffdb);
 484 |     tjei_write(state, &DQT, sizeof(uint16_t), 1);
 485 |     uint16_t len = tjei_be_word(0x0043); // 2(len) + 1(id) + 64(matrix) = 67 = 0x43
 486 |     tjei_write(state, &len, sizeof(uint16_t), 1);
 487 |     assert(id < 4);
 488 |     uint8_t precision_and_id = id;  // 0x0000 8 bits | 0x00id
 489 |     tjei_write(state, &precision_and_id, sizeof(uint8_t), 1);
 490 |     // Write matrix
 491 |     tjei_write(state, matrix, 64*sizeof(uint8_t), 1);
 492 | }
 493 | 
 494 | typedef enum
 495 | {
 496 |     TJEI_DC = 0,
 497 |     TJEI_AC = 1
 498 | } TJEHuffmanTableClass;
 499 | 
 500 | static void tjei_write_DHT(TJEState* state,
 501 |                            uint8_t const * matrix_len,
 502 |                            uint8_t const * matrix_val,
 503 |                            TJEHuffmanTableClass ht_class,
 504 |                            uint8_t id)
 505 | {
 506 |     int num_values = 0;
 507 |     for ( int i = 0; i < 16; ++i ) {
 508 |         num_values += matrix_len[i];
 509 |     }
 510 |     assert(num_values <= 0xffff);
 511 | 
 512 |     uint16_t DHT = tjei_be_word(0xffc4);
 513 |     // 2(len) + 1(Tc|th) + 16 (num lengths) + ?? (num values)
 514 |     uint16_t len = tjei_be_word(2 + 1 + 16 + (uint16_t)num_values);
 515 |     assert(id < 4);
 516 |     uint8_t tc_th = (uint8_t)((((uint8_t)ht_class) << 4) | id);
 517 | 
 518 |     tjei_write(state, &DHT, sizeof(uint16_t), 1);
 519 |     tjei_write(state, &len, sizeof(uint16_t), 1);
 520 |     tjei_write(state, &tc_th, sizeof(uint8_t), 1);
 521 |     tjei_write(state, matrix_len, sizeof(uint8_t), 16);
 522 |     tjei_write(state, matrix_val, sizeof(uint8_t), (size_t)num_values);
 523 | }
 524 | // ============================================================
 525 | //  Huffman deflation code.
 526 | // ============================================================
 527 | 
 528 | // Returns all code sizes from the BITS specification (JPEG C.3)
 529 | static uint8_t* tjei_huff_get_code_lengths(uint8_t huffsize[/*256*/], uint8_t const * bits)
 530 | {
 531 |     int k = 0;
 532 |     for ( int i = 0; i < 16; ++i ) {
 533 |         for ( int j = 0; j < bits[i]; ++j ) {
 534 |             huffsize[k++] = (uint8_t)(i + 1);
 535 |         }
 536 |         huffsize[k] = 0;
 537 |     }
 538 |     return huffsize;
 539 | }
 540 | 
 541 | // Fills out the prefixes for each code.
 542 | static uint16_t* tjei_huff_get_codes(uint16_t codes[], uint8_t* huffsize, int64_t count)
 543 | {
 544 |     uint16_t code = 0;
 545 |     int k = 0;
 546 |     uint8_t sz = huffsize[0];
 547 |     for(;;) {
 548 |         do {
 549 |             assert(k < count);
 550 |             codes[k++] = code++;
 551 |         } while (huffsize[k] == sz);
 552 |         if (huffsize[k] == 0) {
 553 |             return codes;
 554 |         }
 555 |         do {
 556 |             code = (uint16_t)(code << 1);
 557 |             ++sz;
 558 |         } while( huffsize[k] != sz );
 559 |     }
 560 | }
 561 | 
 562 | static void tjei_huff_get_extended(uint8_t* out_ehuffsize,
 563 |                                    uint16_t* out_ehuffcode,
 564 |                                    uint8_t const * huffval,
 565 |                                    uint8_t* huffsize,
 566 |                                    uint16_t* huffcode, int64_t count)
 567 | {
 568 |     int k = 0;
 569 |     do {
 570 |         uint8_t val = huffval[k];
 571 |         out_ehuffcode[val] = huffcode[k];
 572 |         out_ehuffsize[val] = huffsize[k];
 573 |         k++;
 574 |     } while ( k < count );
 575 | }
 576 | // ============================================================
 577 | 
 578 | // Returns:
 579 | //  out[1] : number of bits
 580 | //  out[0] : bits
 581 | TJEI_FORCE_INLINE void tjei_calculate_variable_length_int(int value, uint16_t out[2])
 582 | {
 583 |     int abs_val = value;
 584 |     if ( value < 0 ) {
 585 |         abs_val = -abs_val;
 586 |         --value;
 587 |     }
 588 |     out[1] = 1;
 589 |     while( abs_val >>= 1 ) {
 590 |         ++out[1];
 591 |     }
 592 |     out[0] = (uint16_t)(value & ((1 << out[1]) - 1));
 593 | }
 594 | 
 595 | // Write bits to file.
 596 | TJEI_FORCE_INLINE void tjei_write_bits(TJEState* state,
 597 |                                        uint32_t* bitbuffer, uint32_t* location,
 598 |                                        uint16_t num_bits, uint16_t bits)
 599 | {
 600 |     //   v-- location
 601 |     //  [                     ]   <-- bit buffer
 602 |     // 32                     0
 603 |     //
 604 |     // This call pushes to the bitbuffer and saves the location. Data is pushed
 605 |     // from most significant to less significant.
 606 |     // When we can write a full byte, we write a byte and shift.
 607 | 
 608 |     // Push the stack.
 609 |     uint32_t nloc = *location + num_bits;
 610 |     *bitbuffer |= (uint32_t)(bits << (32 - nloc));
 611 |     *location = nloc;
 612 |     while ( *location >= 8 ) {
 613 |         // Grab the most significant byte.
 614 |         uint8_t c = (uint8_t)((*bitbuffer) >> 24);
 615 |         // Write it to file.
 616 |         tjei_write(state, &c, 1, 1);
 617 |         if ( c == 0xff )  {
 618 |             // Special case: tell JPEG this is not a marker.
 619 |             char z = 0;
 620 |             tjei_write(state, &z, 1, 1);
 621 |         }
 622 |         // Pop the stack.
 623 |         *bitbuffer <<= 8;
 624 |         *location -= 8;
 625 |     }
 626 | }
 627 | 
 628 | // DCT implementation by Thomas G. Lane.
 629 | // Obtained through NVIDIA
 630 | //  http://developer.download.nvidia.com/SDK/9.5/Samples/vidimaging_samples.html#gpgpu_dct
 631 | //
 632 | // QUOTE:
 633 | //  This implementation is based on Arai, Agui, and Nakajima's algorithm for
 634 | //  scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
 635 | //  Japanese, but the algorithm is described in the Pennebaker & Mitchell
 636 | //  JPEG textbook (see REFERENCES section in file README).  The following code
 637 | //  is based directly on figure 4-8 in P&M.
 638 | //
 639 | static void tjei_fdct (float * data)
 640 | {
 641 |     float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 642 |     float tmp10, tmp11, tmp12, tmp13;
 643 |     float z1, z2, z3, z4, z5, z11, z13;
 644 |     float *dataptr;
 645 |     int ctr;
 646 | 
 647 |     /* Pass 1: process rows. */
 648 | 
 649 |     dataptr = data;
 650 |     for ( ctr = 7; ctr >= 0; ctr-- ) {
 651 |         tmp0 = dataptr[0] + dataptr[7];
 652 |         tmp7 = dataptr[0] - dataptr[7];
 653 |         tmp1 = dataptr[1] + dataptr[6];
 654 |         tmp6 = dataptr[1] - dataptr[6];
 655 |         tmp2 = dataptr[2] + dataptr[5];
 656 |         tmp5 = dataptr[2] - dataptr[5];
 657 |         tmp3 = dataptr[3] + dataptr[4];
 658 |         tmp4 = dataptr[3] - dataptr[4];
 659 | 
 660 |         /* Even part */
 661 | 
 662 |         tmp10 = tmp0 + tmp3;    /* phase 2 */
 663 |         tmp13 = tmp0 - tmp3;
 664 |         tmp11 = tmp1 + tmp2;
 665 |         tmp12 = tmp1 - tmp2;
 666 | 
 667 |         dataptr[0] = tmp10 + tmp11; /* phase 3 */
 668 |         dataptr[4] = tmp10 - tmp11;
 669 | 
 670 |         z1 = (tmp12 + tmp13) * ((float) 0.707106781); /* c4 */
 671 |         dataptr[2] = tmp13 + z1;    /* phase 5 */
 672 |         dataptr[6] = tmp13 - z1;
 673 | 
 674 |         /* Odd part */
 675 | 
 676 |         tmp10 = tmp4 + tmp5;    /* phase 2 */
 677 |         tmp11 = tmp5 + tmp6;
 678 |         tmp12 = tmp6 + tmp7;
 679 | 
 680 |         /* The rotator is modified from fig 4-8 to avoid extra negations. */
 681 |         z5 = (tmp10 - tmp12) * ((float) 0.382683433); /* c6 */
 682 |         z2 = ((float) 0.541196100) * tmp10 + z5; /* c2-c6 */
 683 |         z4 = ((float) 1.306562965) * tmp12 + z5; /* c2+c6 */
 684 |         z3 = tmp11 * ((float) 0.707106781); /* c4 */
 685 | 
 686 |         z11 = tmp7 + z3;        /* phase 5 */
 687 |         z13 = tmp7 - z3;
 688 | 
 689 |         dataptr[5] = z13 + z2;  /* phase 6 */
 690 |         dataptr[3] = z13 - z2;
 691 |         dataptr[1] = z11 + z4;
 692 |         dataptr[7] = z11 - z4;
 693 | 
 694 |         dataptr += 8;     /* advance pointer to next row */
 695 |     }
 696 | 
 697 |     /* Pass 2: process columns. */
 698 | 
 699 |     dataptr = data;
 700 |     for ( ctr = 8-1; ctr >= 0; ctr-- ) {
 701 |         tmp0 = dataptr[8*0] + dataptr[8*7];
 702 |         tmp7 = dataptr[8*0] - dataptr[8*7];
 703 |         tmp1 = dataptr[8*1] + dataptr[8*6];
 704 |         tmp6 = dataptr[8*1] - dataptr[8*6];
 705 |         tmp2 = dataptr[8*2] + dataptr[8*5];
 706 |         tmp5 = dataptr[8*2] - dataptr[8*5];
 707 |         tmp3 = dataptr[8*3] + dataptr[8*4];
 708 |         tmp4 = dataptr[8*3] - dataptr[8*4];
 709 | 
 710 |         /* Even part */
 711 | 
 712 |         tmp10 = tmp0 + tmp3;    /* phase 2 */
 713 |         tmp13 = tmp0 - tmp3;
 714 |         tmp11 = tmp1 + tmp2;
 715 |         tmp12 = tmp1 - tmp2;
 716 | 
 717 |         dataptr[8*0] = tmp10 + tmp11; /* phase 3 */
 718 |         dataptr[8*4] = tmp10 - tmp11;
 719 | 
 720 |         z1 = (tmp12 + tmp13) * ((float) 0.707106781); /* c4 */
 721 |         dataptr[8*2] = tmp13 + z1; /* phase 5 */
 722 |         dataptr[8*6] = tmp13 - z1;
 723 | 
 724 |         /* Odd part */
 725 | 
 726 |         tmp10 = tmp4 + tmp5;    /* phase 2 */
 727 |         tmp11 = tmp5 + tmp6;
 728 |         tmp12 = tmp6 + tmp7;
 729 | 
 730 |         /* The rotator is modified from fig 4-8 to avoid extra negations. */
 731 |         z5 = (tmp10 - tmp12) * ((float) 0.382683433); /* c6 */
 732 |         z2 = ((float) 0.541196100) * tmp10 + z5; /* c2-c6 */
 733 |         z4 = ((float) 1.306562965) * tmp12 + z5; /* c2+c6 */
 734 |         z3 = tmp11 * ((float) 0.707106781); /* c4 */
 735 | 
 736 |         z11 = tmp7 + z3;        /* phase 5 */
 737 |         z13 = tmp7 - z3;
 738 | 
 739 |         dataptr[8*5] = z13 + z2; /* phase 6 */
 740 |         dataptr[8*3] = z13 - z2;
 741 |         dataptr[8*1] = z11 + z4;
 742 |         dataptr[8*7] = z11 - z4;
 743 | 
 744 |         dataptr++;          /* advance pointer to next column */
 745 |     }
 746 | }
 747 | #if !TJE_USE_FAST_DCT
 748 | static float slow_fdct(int u, int v, float* data)
 749 | {
 750 | #define kPI 3.14159265f
 751 |     float res = 0.0f;
 752 |     float cu = (u == 0) ? 0.70710678118654f : 1;
 753 |     float cv = (v == 0) ? 0.70710678118654f : 1;
 754 |     for ( int y = 0; y < 8; ++y ) {
 755 |         for ( int x = 0; x < 8; ++x ) {
 756 |             res += (data[y * 8 + x]) *
 757 |                     cosf(((2.0f * x + 1.0f) * u * kPI) / 16.0f) *
 758 |                     cosf(((2.0f * y + 1.0f) * v * kPI) / 16.0f);
 759 |         }
 760 |     }
 761 |     res *= 0.25f * cu * cv;
 762 |     return res;
 763 | #undef kPI
 764 | }
 765 | #endif
 766 | 
 767 | #define ABS(x) ((x) < 0 ? -(x) : (x))
 768 | 
 769 | static void tjei_encode_and_write_MCU(TJEState* state,
 770 |                                       float* mcu,
 771 | #if TJE_USE_FAST_DCT
 772 |                                       float* qt,  // Pre-processed quantization matrix.
 773 | #else
 774 |                                       uint8_t* qt,
 775 | #endif
 776 |                                       uint8_t* huff_dc_len, uint16_t* huff_dc_code, // Huffman tables
 777 |                                       uint8_t* huff_ac_len, uint16_t* huff_ac_code,
 778 |                                       int* pred,  // Previous DC coefficient
 779 |                                       uint32_t* bitbuffer,  // Bitstack.
 780 |                                       uint32_t* location)
 781 | {
 782 |     int du[64];  // Data unit in zig-zag order
 783 | 
 784 |     float dct_mcu[64];
 785 |     memcpy(dct_mcu, mcu, 64 * sizeof(float));
 786 | 
 787 | #if TJE_USE_FAST_DCT
 788 |     tjei_fdct(dct_mcu);
 789 |     for ( int i = 0; i < 64; ++i ) {
 790 |         float fval = dct_mcu[i];
 791 |         fval *= qt[i];
 792 | #if 0
 793 |         fval = (fval > 0) ? floorf(fval + 0.5f) : ceilf(fval - 0.5f);
 794 | #else
 795 |         fval = floorf(fval + 1024 + 0.5f);
 796 |         fval -= 1024;
 797 | #endif
 798 |         int val = (int)fval;
 799 |         du[tjei_zig_zag[i]] = val;
 800 |     }
 801 | #else
 802 |     for ( int v = 0; v < 8; ++v ) {
 803 |         for ( int u = 0; u < 8; ++u ) {
 804 |             dct_mcu[v * 8 + u] = slow_fdct(u, v, mcu);
 805 |         }
 806 |     }
 807 |     for ( int i = 0; i < 64; ++i ) {
 808 |         float fval = dct_mcu[i] / (qt[i]);
 809 |         int val = (int)((fval > 0) ? floorf(fval + 0.5f) : ceilf(fval - 0.5f));
 810 |         du[tjei_zig_zag[i]] = val;
 811 |     }
 812 | #endif
 813 | 
 814 |     uint16_t vli[2];
 815 | 
 816 |     // Encode DC coefficient.
 817 |     int diff = du[0] - *pred;
 818 |     *pred = du[0];
 819 |     if ( diff != 0 ) {
 820 |         tjei_calculate_variable_length_int(diff, vli);
 821 |         // Write number of bits with Huffman coding
 822 |         tjei_write_bits(state, bitbuffer, location, huff_dc_len[vli[1]], huff_dc_code[vli[1]]);
 823 |         // Write the bits.
 824 |         tjei_write_bits(state, bitbuffer, location, vli[1], vli[0]);
 825 |     } else {
 826 |         tjei_write_bits(state, bitbuffer, location, huff_dc_len[0], huff_dc_code[0]);
 827 |     }
 828 | 
 829 |     // ==== Encode AC coefficients ====
 830 | 
 831 |     int last_non_zero_i = 0;
 832 |     // Find the last non-zero element.
 833 |     for ( int i = 63; i > 0; --i ) {
 834 |         if (du[i] != 0) {
 835 |             last_non_zero_i = i;
 836 |             break;
 837 |         }
 838 |     }
 839 | 
 840 |     for ( int i = 1; i <= last_non_zero_i; ++i ) {
 841 |         // If zero, increase count. If >=15, encode (FF,00)
 842 |         int zero_count = 0;
 843 |         while ( du[i] == 0 ) {
 844 |             ++zero_count;
 845 |             ++i;
 846 |             if (zero_count == 16) {
 847 |                 // encode (ff,00) == 0xf0
 848 |                 tjei_write_bits(state, bitbuffer, location, huff_ac_len[0xf0], huff_ac_code[0xf0]);
 849 |                 zero_count = 0;
 850 |             }
 851 |         }
 852 |         tjei_calculate_variable_length_int(du[i], vli);
 853 | 
 854 |         assert(zero_count < 0x10);
 855 |         assert(vli[1] <= 10);
 856 | 
 857 |         uint16_t sym1 = (uint16_t)((uint16_t)zero_count << 4) | vli[1];
 858 | 
 859 |         assert(huff_ac_len[sym1] != 0);
 860 | 
 861 |         // Write symbol 1  --- (RUNLENGTH, SIZE)
 862 |         tjei_write_bits(state, bitbuffer, location, huff_ac_len[sym1], huff_ac_code[sym1]);
 863 |         // Write symbol 2  --- (AMPLITUDE)
 864 |         tjei_write_bits(state, bitbuffer, location, vli[1], vli[0]);
 865 |     }
 866 | 
 867 |     if (last_non_zero_i != 63) {
 868 |         // write EOB HUFF(00,00)
 869 |         tjei_write_bits(state, bitbuffer, location, huff_ac_len[0], huff_ac_code[0]);
 870 |     }
 871 |     return;
 872 | }
 873 | 
 874 | enum {
 875 |     TJEI_LUMA_DC,
 876 |     TJEI_LUMA_AC,
 877 |     TJEI_CHROMA_DC,
 878 |     TJEI_CHROMA_AC,
 879 | };
 880 | 
 881 | #if TJE_USE_FAST_DCT
 882 | struct TJEProcessedQT
 883 | {
 884 |     float chroma[64];
 885 |     float luma[64];
 886 | };
 887 | #endif
 888 | 
 889 | // Set up huffman tables in state.
 890 | static void tjei_huff_expand(TJEState* state)
 891 | {
 892 |     assert(state);
 893 | 
 894 |     state->ht_bits[TJEI_LUMA_DC]   = tjei_default_ht_luma_dc_len;
 895 |     state->ht_bits[TJEI_LUMA_AC]   = tjei_default_ht_luma_ac_len;
 896 |     state->ht_bits[TJEI_CHROMA_DC] = tjei_default_ht_chroma_dc_len;
 897 |     state->ht_bits[TJEI_CHROMA_AC] = tjei_default_ht_chroma_ac_len;
 898 | 
 899 |     state->ht_vals[TJEI_LUMA_DC]   = tjei_default_ht_luma_dc;
 900 |     state->ht_vals[TJEI_LUMA_AC]   = tjei_default_ht_luma_ac;
 901 |     state->ht_vals[TJEI_CHROMA_DC] = tjei_default_ht_chroma_dc;
 902 |     state->ht_vals[TJEI_CHROMA_AC] = tjei_default_ht_chroma_ac;
 903 | 
 904 |     // How many codes in total for each of LUMA_(DC|AC) and CHROMA_(DC|AC)
 905 |     int32_t spec_tables_len[4] = { 0 };
 906 | 
 907 |     for ( int i = 0; i < 4; ++i ) {
 908 |         for ( int k = 0; k < 16; ++k ) {
 909 |             spec_tables_len[i] += state->ht_bits[i][k];
 910 |         }
 911 |     }
 912 | 
 913 |     // Fill out the extended tables..
 914 |     for(byte i=0; i<4; i++) {
 915 |       *huffsize[i] = {'\0'};
 916 |     }
 917 |     for(byte i=0; i<4; i++) {
 918 |       *huffcode[i] = {'\0'};
 919 |     }
 920 | 
 921 |     for ( int i = 0; i < 4; ++i ) {
 922 |         assert (256 >= spec_tables_len[i]);
 923 |         tjei_huff_get_code_lengths(huffsize[i], state->ht_bits[i]);
 924 |         tjei_huff_get_codes(huffcode[i], huffsize[i], spec_tables_len[i]);
 925 |     }
 926 |     for ( int i = 0; i < 4; ++i ) {
 927 |         int64_t count = spec_tables_len[i];
 928 |         tjei_huff_get_extended(state->ehuffsize[i],
 929 |                                state->ehuffcode[i],
 930 |                                state->ht_vals[i],
 931 |                                &huffsize[i][0],
 932 |                                &huffcode[i][0], count);
 933 |     }
 934 | }
 935 | 
 936 | static int tjei_encode_main(TJEState* state,
 937 |                             const unsigned char* src_data,
 938 |                             const int width,
 939 |                             const int height,
 940 |                             const int src_num_components)
 941 | {
 942 |     if (src_num_components != 3 && src_num_components != 4) {
 943 |         return 0;
 944 |     }
 945 | 
 946 |     if (width > 0xffff || height > 0xffff) {
 947 |         return 0;
 948 |     }
 949 | 
 950 | #if TJE_USE_FAST_DCT
 951 |     struct TJEProcessedQT pqt;
 952 |     // Again, taken from classic japanese implementation.
 953 |     //
 954 |     /* For float AA&N IDCT method, divisors are equal to quantization
 955 |      * coefficients scaled by scalefactor[row]*scalefactor[col], where
 956 |      *   scalefactor[0] = 1
 957 |      *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 958 |      * We apply a further scale factor of 8.
 959 |      * What's actually stored is 1/divisor so that the inner loop can
 960 |      * use a multiplication rather than a division.
 961 |      */
 962 |     static const float aan_scales[] = {
 963 |         1.0f, 1.387039845f, 1.306562965f, 1.175875602f,
 964 |         1.0f, 0.785694958f, 0.541196100f, 0.275899379f
 965 |     };
 966 | 
 967 |     // build (de)quantization tables
 968 |     for(int y=0; y<8; y++) {
 969 |         for(int x=0; x<8; x++) {
 970 |             int i = y*8 + x;
 971 |             pqt.luma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_luma[tjei_zig_zag[i]]);
 972 |             pqt.chroma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_chroma[tjei_zig_zag[i]]);
 973 |         }
 974 |     }
 975 | #endif
 976 | 
 977 |     { // Write header
 978 |         TJEJPEGHeader header;
 979 |         // JFIF header.
 980 |         header.SOI = tjei_be_word(0xffd8);  // Sequential DCT
 981 |         header.APP0 = tjei_be_word(0xffe0);
 982 | 
 983 |         uint16_t jfif_len = sizeof(TJEJPEGHeader) - 4 /*SOI & APP0 markers*/;
 984 |         header.jfif_len = tjei_be_word(jfif_len);
 985 |         memcpy(header.jfif_id, (void*)tjeik_jfif_id, 5);
 986 |         header.version = tjei_be_word(0x0102);
 987 |         header.units = 0x01;  // Dots-per-inch
 988 |         header.x_density = tjei_be_word(0x0060);  // 96 DPI
 989 |         header.y_density = tjei_be_word(0x0060);  // 96 DPI
 990 |         header.x_thumb = 0;
 991 |         header.y_thumb = 0;
 992 |         tjei_write(state, &header, sizeof(TJEJPEGHeader), 1);
 993 |     }
 994 |     {  // Write comment
 995 |         TJEJPEGComment com;
 996 |         uint16_t com_len = 2 + sizeof(tjeik_com_str) - 1;
 997 |         // Comment
 998 |         com.com = tjei_be_word(0xfffe);
 999 |         com.com_len = tjei_be_word(com_len);
1000 |         memcpy(com.com_str, (void*)tjeik_com_str, sizeof(tjeik_com_str)-1);
1001 |         tjei_write(state, &com, sizeof(TJEJPEGComment), 1);
1002 |     }
1003 | 
1004 |     // Write quantization tables.
1005 |     tjei_write_DQT(state, state->qt_luma, 0x00);
1006 |     tjei_write_DQT(state, state->qt_chroma, 0x01);
1007 | 
1008 |     {  // Write the frame marker.
1009 |         TJEFrameHeader header;
1010 |         header.SOF = tjei_be_word(0xffc0);
1011 |         header.len = tjei_be_word(8 + 3 * 3);
1012 |         header.precision = 8;
1013 |         assert(width <= 0xffff);
1014 |         assert(height <= 0xffff);
1015 |         header.width = tjei_be_word((uint16_t)width);
1016 |         header.height = tjei_be_word((uint16_t)height);
1017 |         header.num_components = 3;
1018 |         uint8_t tables[3] = {
1019 |             0,  // Luma component gets luma table (see tjei_write_DQT call above.)
1020 |             1,  // Chroma component gets chroma table
1021 |             1,  // Chroma component gets chroma table
1022 |         };
1023 |         for (int i = 0; i < 3; ++i) {
1024 |             TJEComponentSpec spec;
1025 |             spec.component_id = (uint8_t)(i + 1);  // No particular reason. Just 1, 2, 3.
1026 |             spec.sampling_factors = (uint8_t)0x11;
1027 |             spec.qt = tables[i];
1028 | 
1029 |             header.component_spec[i] = spec;
1030 |         }
1031 |         // Write to file.
1032 |         tjei_write(state, &header, sizeof(TJEFrameHeader), 1);
1033 |     }
1034 | 
1035 |     tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_DC],   state->ht_vals[TJEI_LUMA_DC], TJEI_DC, 0);
1036 |     tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_AC],   state->ht_vals[TJEI_LUMA_AC], TJEI_AC, 0);
1037 |     tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_DC], state->ht_vals[TJEI_CHROMA_DC], TJEI_DC, 1);
1038 |     tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_AC], state->ht_vals[TJEI_CHROMA_AC], TJEI_AC, 1);
1039 | 
1040 |     // Write start of scan
1041 |     {
1042 |         TJEScanHeader header;
1043 |         header.SOS = tjei_be_word(0xffda);
1044 |         header.len = tjei_be_word((uint16_t)(6 + (sizeof(TJEFrameComponentSpec) * 3)));
1045 |         header.num_components = 3;
1046 | 
1047 |         uint8_t tables[3] = {
1048 |             0x00,
1049 |             0x11,
1050 |             0x11,
1051 |         };
1052 |         for (int i = 0; i < 3; ++i) {
1053 |             TJEFrameComponentSpec cs;
1054 |             // Must be equal to component_id from frame header above.
1055 |             cs.component_id = (uint8_t)(i + 1);
1056 |             cs.dc_ac = (uint8_t)tables[i];
1057 | 
1058 |             header.component_spec[i] = cs;
1059 |         }
1060 |         header.first = 0;
1061 |         header.last  = 63;
1062 |         header.ah_al = 0;
1063 |         tjei_write(state, &header, sizeof(TJEScanHeader), 1);
1064 | 
1065 |     }
1066 |     // Write compressed data.
1067 | 
1068 |     float du_y[64];
1069 |     float du_b[64];
1070 |     float du_r[64];
1071 | 
1072 |     // Set diff to 0.
1073 |     int pred_y = 0;
1074 |     int pred_b = 0;
1075 |     int pred_r = 0;
1076 | 
1077 |     // Bit stack
1078 |     uint32_t bitbuffer = 0;
1079 |     uint32_t location = 0;
1080 | 
1081 | 
1082 |     for ( int y = 0; y < height; y += 8 ) {
1083 |         for ( int x = 0; x < width; x += 8 ) {
1084 |             // Block loop: ====
1085 |             for ( int off_y = 0; off_y < 8; ++off_y ) {
1086 |                 for ( int off_x = 0; off_x < 8; ++off_x ) {
1087 |                     int block_index = (off_y * 8 + off_x);
1088 | 
1089 |                     int src_index = (((y + off_y) * width) + (x + off_x)) * src_num_components;
1090 | 
1091 |                     int col = x + off_x;
1092 |                     int row = y + off_y;
1093 | 
1094 |                     if(row >= height) {
1095 |                         src_index -= (width * (row - height + 1)) * src_num_components;
1096 |                     }
1097 |                     if(col >= width) {
1098 |                         src_index -= (col - width + 1) * src_num_components;
1099 |                     }
1100 |                     assert(src_index < width * height * src_num_components);
1101 | 
1102 |                     uint8_t r = src_data[src_index + 0];
1103 |                     uint8_t g = src_data[src_index + 1];
1104 |                     uint8_t b = src_data[src_index + 2];
1105 | 
1106 |                     float luma = 0.299f   * r + 0.587f    * g + 0.114f    * b - 128;
1107 |                     float cb   = -0.1687f * r - 0.3313f   * g + 0.5f      * b;
1108 |                     float cr   = 0.5f     * r - 0.4187f   * g - 0.0813f   * b;
1109 | 
1110 |                     du_y[block_index] = luma;
1111 |                     du_b[block_index] = cb;
1112 |                     du_r[block_index] = cr;
1113 |                 }
1114 |             }
1115 | 
1116 |             tjei_encode_and_write_MCU(state, du_y,
1117 | #if TJE_USE_FAST_DCT
1118 |                                      pqt.luma,
1119 | #else
1120 |                                      state->qt_luma,
1121 | #endif
1122 |                                      state->ehuffsize[TJEI_LUMA_DC], state->ehuffcode[TJEI_LUMA_DC],
1123 |                                      state->ehuffsize[TJEI_LUMA_AC], state->ehuffcode[TJEI_LUMA_AC],
1124 |                                      &pred_y, &bitbuffer, &location);
1125 |             tjei_encode_and_write_MCU(state, du_b,
1126 | #if TJE_USE_FAST_DCT
1127 |                                      pqt.chroma,
1128 | #else
1129 |                                      state->qt_chroma,
1130 | #endif
1131 |                                      state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
1132 |                                      state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
1133 |                                      &pred_b, &bitbuffer, &location);
1134 |             tjei_encode_and_write_MCU(state, du_r,
1135 | #if TJE_USE_FAST_DCT
1136 |                                      pqt.chroma,
1137 | #else
1138 |                                      state->qt_chroma,
1139 | #endif
1140 |                                      state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
1141 |                                      state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
1142 |                                      &pred_r, &bitbuffer, &location);
1143 | 
1144 | 
1145 |         }
1146 |     }
1147 | 
1148 |     // Finish the image.
1149 |     { // Flush
1150 |         if (location > 0 && location < 8) {
1151 |             tjei_write_bits(state, &bitbuffer, &location, (uint16_t)(8 - location), 0);
1152 |         }
1153 |     }
1154 |     uint16_t EOI = tjei_be_word(0xffd9);
1155 |     tjei_write(state, &EOI, sizeof(uint16_t), 1);
1156 | 
1157 |     if (state->output_buffer_count) {
1158 |         state->write_context.func(state->write_context.context, state->output_buffer, (int)state->output_buffer_count);
1159 |         state->output_buffer_count = 0;
1160 |     }
1161 | 
1162 |     return 1;
1163 | }
1164 | 
1165 | 
1166 | fs::File jpegFile;
1167 | 
1168 | int tje_encode_to_file(const char* dest_path,
1169 |                        const int width,
1170 |                        const int height,
1171 |                        const int num_components,
1172 |                        const unsigned char* src_data) {
1173 |     int res = tje_encode_to_file_at_quality(dest_path, 2, width, height, num_components, src_data);
1174 |     return res;
1175 | }
1176 | 
1177 | static void tjei_stdlib_func(void* context, void* data, int size) {
1178 |     jpegFile.write((uint8_t*)data, size);
1179 | }
1180 | 
1181 | // Define public interface.
1182 | int tje_encode_to_file_at_quality(const char* dest_path,
1183 |                                   const int quality,
1184 |                                   const int width,
1185 |                                   const int height,
1186 |                                   const int num_components,
1187 |                                   const unsigned char* src_data) {
1188 |     FILE* fd;
1189 |     jpegFile = SD.open(dest_path, FILE_WRITE);
1190 |     if (!jpegFile) {
1191 |         tje_log("Could not open file for writing.");
1192 |         return 0;
1193 |     }
1194 |     int result = tje_encode_with_func(tjei_stdlib_func, fd, quality, width, height, num_components, src_data);
1195 |     result |= 0 == fclose(fd);
1196 |     jpegFile.close();
1197 |     return result;
1198 | }
1199 | 
1200 | int tje_encode_with_func(tje_write_func* func,
1201 |                          void* context,
1202 |                          const int quality,
1203 |                          const int width,
1204 |                          const int height,
1205 |                          const int num_components,
1206 |                          const unsigned char* src_data) {
1207 |     if (quality < 1 || quality > 3) {
1208 |         tje_log("[ERROR] -- Valid 'quality' values are 1 (lowest), 2, or 3 (highest)\n");
1209 |         return 0;
1210 |     }
1211 | 
1212 |     TJEState state = { 0 };
1213 | 
1214 |     uint8_t qt_factor = 1;
1215 |     switch(quality) {
1216 |     case 3:
1217 |         for ( int i = 0; i < 64; ++i ) {
1218 |             state.qt_luma[i]   = 1;
1219 |             state.qt_chroma[i] = 1;
1220 |         }
1221 |         break;
1222 |     case 2:
1223 |         qt_factor = 10;
1224 |         // don't break. fall through.
1225 |     case 1:
1226 |         for ( int i = 0; i < 64; ++i ) {
1227 |             state.qt_luma[i]   = tjei_default_qt_luma_from_spec[i] / qt_factor;
1228 |             if (state.qt_luma[i] == 0) {
1229 |                 state.qt_luma[i] = 1;
1230 |             }
1231 |             state.qt_chroma[i] = tjei_default_qt_chroma_from_paper[i] / qt_factor;
1232 |             if (state.qt_chroma[i] == 0) {
1233 |                 state.qt_chroma[i] = 1;
1234 |             }
1235 |         }
1236 |         break;
1237 |     default:
1238 |         assert(!"invalid code path");
1239 |         break;
1240 |     }
1241 | 
1242 |     TJEWriteContext wc = { 0 };
1243 | 
1244 |     wc.context = context;
1245 |     wc.func = func;
1246 | 
1247 |     state.write_context = wc;
1248 | 
1249 | 
1250 |     tjei_huff_expand(&state);
1251 | 
1252 |     int result = tjei_encode_main(&state, src_data, width, height, num_components);
1253 | 
1254 |     return result;
1255 | }
1256 | // ============================================================
1257 | #endif // TJE_IMPLEMENTATION
1258 | // ============================================================
1259 | //
1260 | #if defined(__GNUC__) || defined(__clang__)
1261 | #pragma GCC diagnostic pop
1262 | #endif
1263 | 
1264 | 
1265 | #ifdef __cplusplus
1266 | }  // extern C
1267 | #endif
1268 | 


--------------------------------------------------------------------------------
/tinyraytracer.h:
--------------------------------------------------------------------------------
  1 | // modified from https://github.com/ssloy/tinyraytracer
  2 | #ifndef _TINYRAYTRACER
  3 | #define _TINYRAYTRACER
  4 | 
  5 | #include <limits>
  6 | #include <cmath>
  7 | #include <iostream>
  8 | #include <fstream>
  9 | #include <vector>
 10 | #include "geometry.h"
 11 | 
 12 | uint16_t *imgBuffer = NULL; // one scan line used for screen capture
 13 | uint8_t  *rgbBuffer = NULL;
 14 | 
 15 | void tinyRayTracerInit() {
 16 |   // TODO : move this to tinytracer.h
 17 |   if( hasPsram ) {
 18 |     imgBuffer = (uint16_t*)ps_calloc( 320*240, sizeof( uint16_t ) );
 19 |     rgbBuffer = (uint8_t*)ps_calloc( 320*240*3, sizeof( uint8_t ) );
 20 |   } else {
 21 |     imgBuffer = (uint16_t*)calloc( 128*128, sizeof( uint16_t ) );
 22 |     rgbBuffer = (uint8_t*)calloc( 128*128*3, sizeof( uint8_t ) );
 23 |   }
 24 | }
 25 | 
 26 | 
 27 | struct Light {
 28 |     Light(const Vec3f &p, const float &i) : position(p), intensity(i) {}
 29 |     Vec3f position;
 30 |     float intensity;
 31 | };
 32 | 
 33 | struct Material {
 34 |     Material(const float &r, const Vec4f &a, const Vec3f &color, const float &spec) : refractive_index(r), albedo(a), diffuse_color(color), specular_exponent(spec) {}
 35 |     Material() : refractive_index(1), albedo(1,0,0,0), diffuse_color(), specular_exponent() {}
 36 |     float refractive_index;
 37 |     Vec4f albedo;
 38 |     Vec3f diffuse_color;
 39 |     float specular_exponent;
 40 | };
 41 | 
 42 | struct Sphere {
 43 |     Vec3f center;
 44 |     float radius;
 45 |     Material material;
 46 | 
 47 |     Sphere(const Vec3f &c, const float &r, const Material &m) : center(c), radius(r), material(m) {}
 48 | 
 49 |     bool ray_intersect(const Vec3f &orig, const Vec3f &dir, float &t0) const {
 50 |         Vec3f L = center - orig;
 51 |         float tca = L*dir;
 52 |         float d2 = L*L - tca*tca;
 53 |         if (d2 > radius*radius) return false;
 54 |         float thc = sqrtf(radius*radius - d2);
 55 |         t0       = tca - thc;
 56 |         float t1 = tca + thc;
 57 |         if (t0 < 0) t0 = t1;
 58 |         if (t0 < 0) return false;
 59 |         return true;
 60 |     }
 61 | };
 62 | 
 63 | Vec3f reflect(const Vec3f &I, const Vec3f &N) {
 64 |     return I - N*2.f*(I*N);
 65 | }
 66 | 
 67 | Vec3f refract(const Vec3f &I, const Vec3f &N, const float &refractive_index) { // Snell's law
 68 |     float cosi = - std::max(-1.f, std::min(1.f, I*N));
 69 |     float etai = 1, etat = refractive_index;
 70 |     Vec3f n = N;
 71 |     if (cosi < 0) { // if the ray is inside the object, swap the indices and invert the normal to get the correct result
 72 |         cosi = -cosi;
 73 |         std::swap(etai, etat); n = -N;
 74 |     }
 75 |     float eta = etai / etat;
 76 |     float k = 1 - eta*eta*(1 - cosi*cosi);
 77 |     return k < 0 ? Vec3f(0,0,0) : I*eta + n*(eta * cosi - sqrtf(k));
 78 | }
 79 | 
 80 | bool scene_intersect(const Vec3f &orig, const Vec3f &dir, const std::vector<Sphere> &spheres, Vec3f &hit, Vec3f &N, Material &material) {
 81 |     float spheres_dist = std::numeric_limits<float>::max();
 82 |     for (size_t i=0; i < spheres.size(); i++) {
 83 |         float dist_i;
 84 |         if (spheres[i].ray_intersect(orig, dir, dist_i) && dist_i < spheres_dist) {
 85 |             spheres_dist = dist_i;
 86 |             hit = orig + dir*dist_i;
 87 |             N = (hit - spheres[i].center).normalize();
 88 |             material = spheres[i].material;
 89 |         }
 90 |     }
 91 | 
 92 |     float checkerboard_dist = std::numeric_limits<float>::max();
 93 |     if (fabs(dir.y)>1e-3)  {
 94 |         float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
 95 |         Vec3f pt = orig + dir*d;
 96 |         if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
 97 |             checkerboard_dist = d;
 98 |             hit = pt;
 99 |             N = Vec3f(0,1,0);
100 |             material.diffuse_color = (int(.5*hit.x+1000) + int(.5*hit.z)) & 1 ? Vec3f(1,1,1) : Vec3f(1, .7, .3);
101 |             material.diffuse_color = material.diffuse_color*.3;
102 |         }
103 |     }
104 |     return std::min(spheres_dist, checkerboard_dist)<1000;
105 | }
106 | 
107 | Vec3f cast_ray(const Vec3f &orig, const Vec3f &dir, const std::vector<Sphere> &spheres, const std::vector<Light> &lights, size_t depth=0) {
108 |     Vec3f point, N;
109 |     Material material;
110 | 
111 |     if (depth>4 || !scene_intersect(orig, dir, spheres, point, N, material)) {
112 |         return Vec3f(0.2, 0.7, 0.8); // background color
113 |     }
114 | 
115 |     Vec3f reflect_dir = reflect(dir, N).normalize();
116 |     Vec3f refract_dir = refract(dir, N, material.refractive_index).normalize();
117 |     Vec3f reflect_orig = reflect_dir*N < 0 ? point - N*1e-3 : point + N*1e-3; // offset the original point to avoid occlusion by the object itself
118 |     Vec3f refract_orig = refract_dir*N < 0 ? point - N*1e-3 : point + N*1e-3;
119 |     Vec3f reflect_color = cast_ray(reflect_orig, reflect_dir, spheres, lights, depth + 1);
120 |     Vec3f refract_color = cast_ray(refract_orig, refract_dir, spheres, lights, depth + 1);
121 | 
122 |     float diffuse_light_intensity = 0, specular_light_intensity = 0;
123 |     for (size_t i=0; i<lights.size(); i++) {
124 |         Vec3f light_dir      = (lights[i].position - point).normalize();
125 |         float light_distance = (lights[i].position - point).norm();
126 | 
127 |         Vec3f shadow_orig = light_dir*N < 0 ? point - N*1e-3 : point + N*1e-3; // checking if the point lies in the shadow of the lights[i]
128 |         Vec3f shadow_pt, shadow_N;
129 |         Material tmpmaterial;
130 |         if (scene_intersect(shadow_orig, light_dir, spheres, shadow_pt, shadow_N, tmpmaterial) && (shadow_pt-shadow_orig).norm() < light_distance)
131 |             continue;
132 | 
133 |         diffuse_light_intensity  += lights[i].intensity * std::max(0.f, light_dir*N);
134 |         specular_light_intensity += powf(std::max(0.f, -reflect(-light_dir, N)*dir), material.specular_exponent)*lights[i].intensity;
135 |     }
136 |     return material.diffuse_color * diffuse_light_intensity * material.albedo[0] + Vec3f(1., 1., 1.)*specular_light_intensity * material.albedo[1] + reflect_color*material.albedo[2] + refract_color*material.albedo[3];
137 | }
138 | 
139 | void render(uint16_t posx, uint16_t posy, uint16_t width, uint16_t height, const std::vector<Sphere> &spheres, const std::vector<Light> &lights, float fov=M_PI/2) {
140 |   // yay ! thanks to @atanisoft https://gitter.im/espressif/arduino-esp32?at=5c474edc8ce4bb25b8f1ed95
141 |   uint32_t pos = 0;
142 |   for (size_t j = 0; j<height; j++) {
143 |     for (size_t i = 0; i<width; i++) {
144 |       float x =  (2*(i + 0.5)/(float)width  - 1)*tan(fov/2.)*width/(float)height;
145 |       float y = -(2*(j + 0.5)/(float)height - 1)*tan(fov/2.);
146 |       Vec3f dir = Vec3f(x, y, -1).normalize();
147 |       Vec3f pixelbuffer = cast_ray(Vec3f(0,0,0), dir, spheres, lights);
148 |       Vec3f &c = pixelbuffer;
149 |       float max = std::max(c[0], std::max(c[1], c[2]));
150 |       if (max>1) c = c*(1./max);
151 |       char r = (char)(255 * std::max(0.f, std::min(1.f, pixelbuffer[0])));
152 |       char g = (char)(255 * std::max(0.f, std::min(1.f, pixelbuffer[1])));
153 |       char b = (char)(255 * std::max(0.f, std::min(1.f, pixelbuffer[2])));
154 | 
155 |       rgbBuffer[++pos] = (uint8_t)g;
156 |       rgbBuffer[++pos] = (uint8_t)b;
157 |       rgbBuffer[++pos] = (uint8_t)r;
158 | 
159 |       //uint16_t pixelcolor = tft.color565(r, g, b);
160 |       //tft.drawPixel(i+posx, j+posy, pixelcolor);
161 |     }
162 |   }
163 |   //Serial.printf("Render: pos: %d, factor: %d, size: %d\n", pos, height*width*3 , height*width );
164 | }
165 | 
166 | 
167 | #endif
168 | 


--------------------------------------------------------------------------------