├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── picpac3d.cpp └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | *.smod 19 | 20 | # Compiled Static libraries 21 | *.lai 22 | *.la 23 | *.a 24 | *.lib 25 | 26 | # Executables 27 | *.exe 28 | *.out 29 | *.app 30 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "picpac"] 2 | path = picpac 3 | url = http://github.com/aaalgo/picpac 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # picpac3d 2 | 3 | This package is for sampling and streaming volumetric data for 4 | training 3D convolutional neural networks. It is designed as 5 | part of my solution for Data Science Bowl 2017. 6 | 7 | Highlights: 8 | - Lossless storing 8-bit volumetric data with H265. 9 | - On-the-fly region sampling and augmentation with random rotation and 10 | scaling using OpenGL 3D texture. 11 | 12 | 13 | # Building on Ubuntu 16.04 14 | 15 | Install libraries 16 | ``` 17 | apt-get install libboost-all-dev libopencv-dev libglog-dev 18 | apt-get install libgl1-mesa-dev libglew-dev libglfw3-dev libglm-dev 19 | apt-get install libx265-dev libde265-dev 20 | ``` 21 | 22 | Then compile with 23 | ``` 24 | git submodule update --init --recursive 25 | python setup.py build 26 | sudo python setup.py install 27 | ``` 28 | 29 | # Tips 30 | 31 | ## Cube Stream 32 | ``` 33 | threads = 1 # PicPac preload threads, must be 1. 34 | decode_threads = 4 # H265 decoding threads. 35 | preload = 256 # 36 | samples0 = 48 37 | samples1 = 48 38 | pool = 4096 # 39 | ``` 40 | 41 | For a core i7 2600k to sustain 50 sample/second, each volume must generate 42 | about 100 examples (samples0 = samples1= 48). If network trains 43 | slower than that, samples per volume can be lowered accordingly 44 | to improve variety. 45 | To avoid feeding the network samples from the same volume continuously, 46 | a global pool of samples is maintained. All samples from a newly 47 | loaded volume are first added to the pool, and samples are 48 | randomly sampled from the pool for feeding into the network. 49 | With the above configuration, the pool contains samples from 50 | 4096/(48+48) = 42 volumes. That is within the window of each 51 | 4096 samples, the samples are from about 42 volumes (actually more 52 | than that, as samles are randomly drawn from the pool, a particular 53 | lucky (unlucky) sample of a volume might linger in the pool for a 54 | long time, increasing the volume-variety of the pool. 55 | 56 | Because of hardware limitation, the library currently only supports 57 | the following size configuration: 58 | 59 | - Input volume must be about 512x512x512. It doesn't have to be 60 | exactly this. The library will automatically clip and pad the data. 61 | - Each sample is of size 64x64x64. 62 | - PicPac3D only supports annotations in the shape of 63 | balls (x, y, z, r). 64 | 65 | The library does the following data augmentation: 66 | - For background cubes, a sample is randomly sampled from the whole volume. 67 | The library does not garantee that a negative cube doesn't cover 68 | any annotation balls, but if it does, all corresponding voxels in the 69 | label cube are properly set to 1. 70 | - Each positive cube roughly centers around an annotation ball, with the 71 | ball center randomly shifted by [-r, r] along each axis. 72 | - Scale = 1 means that a cube covers 64x64x64 of the original volume. 73 | By setting scale=0.5, the output cube size is still 64x64x64, but 74 | it covers 128x128x128 of the original volume, with more data but 75 | less resolution. Scale is randomly sampled from the range of 76 | min_pert_scale and max_pert_scale. 77 | - A random direction is picked uniformly from a 3D unit sphere, and 78 | the cube is randomly rotated by a number of degrees randomly 79 | sampled from [-pert_angles, pert_angles]. 80 | - A random delta of color value from [-pert_color1, pert_color1] is 81 | added to the cube. 82 | 83 | Usage example with Tensorflow: 84 | ```python 85 | import picpac3d 86 | ... 87 | picpac_config = dict(seed=2017, 88 | # most are just PicPac configurations. 89 | threads=1, 90 | decode_threads=4, # decoding threads 91 | preload=512, 92 | cache=False, 93 | shuffle=True, 94 | reshuffle=True, 95 | stratify=True, # stratify sampling of volumes by label 96 | channels=1, 97 | pert_color1=20, # randomly +/- 0~20 to color 98 | # for Kaggle/Luna data, corresponds to about 2 mm^3/voxel. 99 | pert_min_scale=0.45, 100 | pert_max_scale=0.55, 101 | # are we rotating too much? 102 | pert_angle = 180, # in degrees; is that too much? 103 | samples0 = 32, # sample 32 negative cubes 104 | samples1 = 64, # and 64 positive cubes from each volume 105 | pool = 4096 106 | ) 107 | 108 | stream = picpac3d.CubeStream(FLAGS.db_path, perturb=True, loop=True, **picpac_config) 109 | ... 110 | with tf.Session() as sess: 111 | 112 | for _ in range(FLAGS.maximal_training_steps): 113 | images, labels = stream.next() 114 | # image is 64x64x64 of 0-255. 115 | # labels is 64x64x64 of 0 or 1, for training FCN-style networks 116 | feed_dict = {X: images, Y: labels} 117 | mm, _, summaries = sess.run([metrics, train_op, train_summaries], feed_dict=feed_dict) 118 | ``` 119 | 120 | Before streaming, volumes (numpy 3-D) arrays must be first imported into 121 | a database. 122 | ```python 123 | import simplejson as json 124 | import picpac3d 125 | 126 | db = picpac3d.Writer(db_path) 127 | for label, uid in all_cases: 128 | images = load_3d_array_of_type_uint8_of_roughly_512x512x512(uid) 129 | # 3D-scaling lungs to 0.8 mm^3/voxel will be about the proper size 130 | assert len(images.shape) == 3 131 | assert images.dtype == np.uint8 132 | buf1 = picpac3d.encode(images) # buf1 is h265 133 | Z, Y, X = images.shape 134 | meta = { 135 | 'size': [Z, Y, X] # add size for efficient memory 136 | # allocation when decoding volume 137 | } 138 | if has_annotation(uid): # add optional annotations 139 | balls = [] 140 | for nodule in load_annotation(uid): 141 | (z, y, x, r) = nodule 142 | balls.append({'type':'ball', 143 | 'x': x, # 0 <= x < X, same for y, z 144 | 'y': y, # x, y, z are all in pixels 145 | 'z': z, # but can be float for better 146 | 'r': r}) # accuracy 147 | pass 148 | meta['shapes'] = balls 149 | pass 150 | buf2 = json.dumps(meta) 151 | # label of 0/1 is for stratified sampling volumes 152 | # and the meta data in buf2 will produce the actual cube labels 153 | db.append(label, buf1, buf2) 154 | pass 155 | pass 156 | ``` 157 | -------------------------------------------------------------------------------- /picpac3d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "json11.hpp" 25 | 26 | using std::cin; 27 | using std::cout; 28 | using std::cerr; 29 | using std::endl; 30 | using std::vector; 31 | using std::string; 32 | using std::ostringstream; 33 | using std::runtime_error; 34 | using boost::asio::const_buffer; 35 | using namespace boost::python; 36 | using namespace json11; 37 | 38 | string const VERTEX_SHADER = R"gl( 39 | #version 330 core 40 | layout(location = 0) in vec2 pos_in; 41 | layout(location = 1) in vec3 tex_in; 42 | out vec3 tex; 43 | uniform mat4 mvp; 44 | void main(){ 45 | gl_Position = vec4(pos_in,0,1); 46 | tex = (mvp * vec4(tex_in, 1)).xyz; 47 | } 48 | )gl"; 49 | 50 | string const FRAGMENT_SHADER = R"gl( 51 | #version 330 core 52 | in vec3 tex; 53 | //layout(location=0) 54 | out float color; 55 | uniform sampler3D sampler; 56 | void main(){ 57 | color = texture(sampler, tex).r; 58 | } 59 | )gl"; 60 | 61 | struct Tensor3 { 62 | typedef uint8_t T; 63 | bool own; 64 | char *data; 65 | npy_intp dimensions[3]; 66 | npy_intp strides[3]; 67 | float label; 68 | Tensor3 (int Z, int Y, int X, bool zero = false): own(true), label(0) { 69 | if (zero) { 70 | data = (char *)calloc(Z * Y * X, sizeof(T)); 71 | } 72 | else { 73 | data = (char *)malloc(Z * Y * X * sizeof(T)); 74 | } 75 | dimensions[0] = Z; 76 | dimensions[1] = Y; 77 | dimensions[2] = X; 78 | strides[0] = Y * X * sizeof(T); 79 | strides[1] = X * sizeof(T); 80 | strides[2] = sizeof(T); 81 | } 82 | Tensor3 (PyObject *_array): own(false) { 83 | PyArrayObject *array = (PyArrayObject *)_array; 84 | CHECK(array->nd == 3); 85 | dimensions[0] = array->dimensions[0]; 86 | dimensions[1] = array->dimensions[1]; 87 | dimensions[2] = array->dimensions[2]; 88 | strides[0] = array->strides[0]; 89 | strides[1] = array->strides[1]; 90 | strides[2] = array->strides[2]; 91 | data = array->data; 92 | } 93 | ~Tensor3 () { 94 | if (own && data) free(data); 95 | } 96 | object to_npy_and_delete () { 97 | CHECK(own); 98 | PyObject *array = PyArray_SimpleNewFromData(3, dimensions, NPY_UINT8, data); 99 | PyArrayObject *a = (PyArrayObject *)array; 100 | a->flags |= NPY_OWNDATA; 101 | data = 0; 102 | delete this; 103 | return object(boost::python::handle<>(array)); 104 | } 105 | }; 106 | 107 | GLuint LoadShader (GLenum shaderType, string const &buf) { 108 | GLuint ShaderID = glCreateShader(shaderType); 109 | GLint Result = GL_FALSE; 110 | int InfoLogLength; 111 | char const *ptr = buf.c_str(); 112 | glShaderSource(ShaderID, 1, &ptr , NULL); 113 | glCompileShader(ShaderID); 114 | // Check Vertex Shader 115 | glGetShaderiv(ShaderID, GL_COMPILE_STATUS, &Result); 116 | glGetShaderiv(ShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength); 117 | if (InfoLogLength > 0){ 118 | string msg; 119 | msg.resize(InfoLogLength+1); 120 | glGetShaderInfoLog(ShaderID, InfoLogLength, NULL, &msg[0]); 121 | LOG(WARNING) << msg; 122 | } 123 | CHECK(Result); 124 | return ShaderID; 125 | } 126 | 127 | GLuint LoadProgram (string const &vshader, string const &fshader) { 128 | // Create the shaders 129 | GLuint VertexShaderID = LoadShader(GL_VERTEX_SHADER, vshader); 130 | GLuint FragmentShaderID = LoadShader(GL_FRAGMENT_SHADER, fshader); 131 | // Link the program 132 | LOG(INFO) << "Linking program"; 133 | GLuint program = glCreateProgram(); 134 | glAttachShader(program, VertexShaderID); 135 | glAttachShader(program, FragmentShaderID); 136 | glLinkProgram(program); 137 | 138 | GLint Result = GL_FALSE; 139 | int InfoLogLength; 140 | // Check the program 141 | glGetProgramiv(program, GL_LINK_STATUS, &Result); 142 | CHECK(Result); 143 | glGetProgramiv(program, GL_INFO_LOG_LENGTH, &InfoLogLength); 144 | if ( InfoLogLength > 0 ){ 145 | string msg; 146 | msg.resize(InfoLogLength+1); 147 | glGetProgramInfoLog(program, InfoLogLength, NULL, &msg[0]); 148 | LOG(WARNING) << msg; 149 | } 150 | 151 | glDetachShader(program, VertexShaderID); 152 | glDetachShader(program, FragmentShaderID); 153 | 154 | glDeleteShader(VertexShaderID); 155 | glDeleteShader(FragmentShaderID); 156 | 157 | return program; 158 | } 159 | 160 | #define CHECK_POWER_OF_TWO(x) CHECK(((x)&((x)-1)) == 0) 161 | 162 | class Sampler { 163 | GLFWwindow* window; 164 | GLuint program; 165 | GLuint sampler; 166 | GLuint itexture, otexture; 167 | GLuint framebuffer; 168 | GLuint v_pos, v_tex, v_array; 169 | GLuint matrix; 170 | std::thread::id thread_id; 171 | 172 | void check_thread () { 173 | if (thread_id != std::this_thread::get_id()) { 174 | LOG(ERROR) << "Cross thread rendering is not working!"; 175 | CHECK(false); 176 | } 177 | } 178 | public: 179 | static constexpr int CUBE_SIZE = 64; 180 | static constexpr int VOLUME_SIZE = 512; 181 | static constexpr int VIEW_SIZE = 512; 182 | static constexpr int VIEW_PIXELS = VIEW_SIZE * VIEW_SIZE; 183 | Sampler (): thread_id(std::this_thread::get_id()) { 184 | LOG(WARNING) << "Constructing sampler"; 185 | CHECK_POWER_OF_TWO(CUBE_SIZE); 186 | CHECK_POWER_OF_TWO(VOLUME_SIZE); 187 | CHECK_POWER_OF_TWO(VIEW_SIZE); 188 | CHECK(CUBE_SIZE * CUBE_SIZE * CUBE_SIZE == VIEW_SIZE * VIEW_SIZE); 189 | if(!glfwInit()) CHECK(false) << "Failed to initialize GLFW"; 190 | glfwWindowHint(GLFW_SAMPLES, 4); 191 | glfwWindowHint(GLFW_VISIBLE, GL_FALSE); 192 | glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); 193 | glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); 194 | glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // To make MacOS happy; should not be needed 195 | glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); 196 | // we are not going to use the window and will render to texture, so size doesn't matter 197 | window = glfwCreateWindow(32, 32, "", NULL, NULL); 198 | CHECK(window) << "Failed to open GLFW window"; 199 | glfwMakeContextCurrent(window); 200 | // Initialize GLEW 201 | glewExperimental = true; // Needed for core profile 202 | if (glewInit() != GLEW_OK) { 203 | CHECK(false) << "Failed to initialize GLEW"; 204 | } 205 | glEnable(GL_TEXTURE_2D); 206 | glEnable(GL_TEXTURE_3D); 207 | glClearColor(0.0f, 0.0f, 0.0f, 0.0f); 208 | 209 | program = LoadProgram(VERTEX_SHADER, FRAGMENT_SHADER); 210 | sampler = glGetUniformLocation(program, "sampler"); 211 | matrix = glGetUniformLocation(program, "mvp"); 212 | 213 | vector pos; 214 | vector tex; 215 | 216 | int o = 0; 217 | for (int i = 0; i < CUBE_SIZE; ++i) { 218 | for (int j = 0; j < CUBE_SIZE; ++j) { 219 | for (int k = 0; k < CUBE_SIZE; ++k) { 220 | pos.push_back(2.0*(o%VIEW_SIZE+1)/VIEW_SIZE-1); 221 | pos.push_back(2.0*(o/VIEW_SIZE+1)/VIEW_SIZE-1); 222 | tex.push_back(1.0 * k / CUBE_SIZE); 223 | tex.push_back(1.0 * j / CUBE_SIZE); 224 | tex.push_back(1.0 * i / CUBE_SIZE); 225 | ++o; 226 | } 227 | } 228 | } 229 | 230 | glGenVertexArrays(1, &v_array); 231 | glBindVertexArray(v_array); 232 | glEnableVertexAttribArray(0); 233 | glEnableVertexAttribArray(1); 234 | 235 | // output position doesn't change 236 | glGenBuffers(1, &v_pos); 237 | glBindBuffer(GL_ARRAY_BUFFER, v_pos); 238 | glBufferData(GL_ARRAY_BUFFER, sizeof(pos[0]) * pos.size(), &pos[0], GL_STATIC_DRAW); 239 | glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0); 240 | 241 | // texture sampler position doesn't change 242 | glGenBuffers(1, &v_tex); 243 | glBindBuffer(GL_ARRAY_BUFFER, v_tex); 244 | glBufferData(GL_ARRAY_BUFFER, sizeof(tex[0]) * tex.size(), &tex[0], GL_STATIC_DRAW); 245 | 246 | glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 0, 0); 247 | 248 | glGenTextures(1, &otexture); 249 | glBindTexture(GL_TEXTURE_2D, otexture); 250 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 251 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 252 | glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, VIEW_SIZE, VIEW_SIZE, 0, GL_RED, GL_UNSIGNED_BYTE, 0); 253 | 254 | glGenFramebuffers(1, &framebuffer); 255 | glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); 256 | glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, otexture, 0); 257 | 258 | CHECK(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); 259 | glViewport(0, 0, VIEW_SIZE, VIEW_SIZE); 260 | glUseProgram(program); 261 | 262 | glGenTextures(1, &itexture); 263 | glBindTexture(GL_TEXTURE_3D, itexture); 264 | glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 265 | glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 266 | } 267 | 268 | static void strip_pad_512 (int n, int *from, int *to, int *len, int *shift) { 269 | if (n > VOLUME_SIZE) { 270 | *from = (n - VOLUME_SIZE) / 2; 271 | *to = 0; 272 | *len = VOLUME_SIZE; 273 | *shift = *from; 274 | } 275 | else { 276 | *from = 0; 277 | *to = (VOLUME_SIZE - n)/2; 278 | *len = n; 279 | *shift = -*to; 280 | } 281 | } 282 | 283 | void texture_direct (PyObject *_array) { 284 | PyArrayObject *array = (PyArrayObject *)_array; 285 | check_thread(); 286 | CHECK(array->dimensions[0] == VOLUME_SIZE); 287 | CHECK(array->dimensions[1] == VOLUME_SIZE); 288 | CHECK(array->dimensions[2] == VOLUME_SIZE); 289 | CHECK(array->strides[0] = VOLUME_SIZE * VOLUME_SIZE); 290 | CHECK(array->strides[1] = VOLUME_SIZE); 291 | CHECK(array->strides[2] = 1); 292 | glTexImage3D(GL_TEXTURE_3D, 0, GL_RED, 293 | array->dimensions[2], 294 | array->dimensions[1], 295 | array->dimensions[0], 0, GL_RED, GL_UNSIGNED_BYTE, array->data); 296 | glUniform1i(sampler, 0); 297 | } 298 | 299 | void texture (Tensor3 *array, glm::ivec3 *off, glm::ivec3 *len, glm::ivec3 *shift) { 300 | check_thread(); 301 | glActiveTexture(GL_TEXTURE0); 302 | glBindTexture(GL_TEXTURE_3D, itexture); 303 | vector buf(VOLUME_SIZE * VOLUME_SIZE * VOLUME_SIZE, 0); 304 | // copy from array to continuous buf with padding & stripping 305 | int from_z, to_z, n_z, shift_z; 306 | int from_y, to_y, n_y, shift_y; 307 | int from_x, to_x, n_x, shift_x; 308 | 309 | strip_pad_512(array->dimensions[0], &from_z, &to_z, &n_z, &shift_z); 310 | strip_pad_512(array->dimensions[1], &from_y, &to_y, &n_y, &shift_y); 311 | strip_pad_512(array->dimensions[2], &from_x, &to_x, &n_x, &shift_x); 312 | 313 | *off = glm::ivec3(to_x, to_y, to_z); 314 | *len = glm::ivec3(n_x, n_y, n_z); 315 | *shift = glm::ivec3(shift_x, shift_y, shift_z); 316 | 317 | uint8_t const *from_z_ptr = (uint8_t const *)array->data + array->strides[0] * from_z; 318 | uint8_t *to_z_ptr = &buf[0] + (VOLUME_SIZE * VOLUME_SIZE) * to_z; 319 | for (int z = 0; z < n_z; ++z, from_z_ptr += array->strides[0], to_z_ptr += VOLUME_SIZE * VOLUME_SIZE) { 320 | uint8_t const *from_y_ptr = from_z_ptr + array->strides[1] * from_y; 321 | uint8_t *to_y_ptr = to_z_ptr + VOLUME_SIZE * to_y; 322 | for (int y = 0; y < n_y; ++y, from_y_ptr += array->strides[1], to_y_ptr += VOLUME_SIZE) { 323 | uint8_t const *from_x_ptr = from_y_ptr + array->strides[2] * from_x; 324 | uint8_t *to_x_ptr = to_y_ptr + to_x; 325 | for (int x = 0; x < n_x; ++x, from_x_ptr += array->strides[2], ++to_x_ptr) { 326 | *to_x_ptr = *from_x_ptr; 327 | } 328 | } 329 | } 330 | #if 0 331 | static int cccc = 0; 332 | for (unsigned i = 0; i < VOLUME_SIZE; ++i) { 333 | ++cccc; 334 | char bufxx[BUFSIZ]; 335 | sprintf(bufxx, "zzz/%d.jpg", cccc); 336 | cv::imwrite(bufxx, cv::Mat(Sampler::VOLUME_SIZE, Sampler::VOLUME_SIZE, CV_8U, &buf[0] + i * VOLUME_SIZE * VOLUME_SIZE)); 337 | } 338 | #endif 339 | glTexImage3D(GL_TEXTURE_3D, 0, GL_RED, VOLUME_SIZE, VOLUME_SIZE, VOLUME_SIZE, 340 | 0, GL_RED, GL_UNSIGNED_BYTE, &buf[0]); 341 | glUniform1i(sampler, 0); 342 | } 343 | 344 | void texture_indirect (PyObject *array) { 345 | Tensor3 view(array); 346 | glm::ivec3 a, b, c; 347 | texture(&view, &a, &b, &c); 348 | cout << "OFF: " << a[0] << ' ' << a[1] << ' ' << a[2] << endl; 349 | cout << "LEN: " << b[0] << ' ' << b[1] << ' ' << b[2] << endl; 350 | cout << "SHI: " << c[0] << ' ' << c[1] << ' ' << c[2] << endl; 351 | } 352 | 353 | Tensor3 *sample (glm::vec3 center, glm::vec3 rotate, float scale0) { //std::default_random_engine &rng) { 354 | check_thread(); 355 | // 1 -> scale -> rotate -> shift 356 | float scale = 1.0 * CUBE_SIZE / VOLUME_SIZE / scale0; 357 | 358 | /* 359 | cout << "CENTER: " << center[0] << ' ' << center[1] << ' ' << center[2] << endl; 360 | cout << "ANGLE: " << rotate[0] << ' ' << rotate[1] << ' ' << rotate[2] << endl; 361 | cout << "SCALE: " << scale0 << endl; 362 | */ 363 | 364 | glm::mat4 mvp = glm::translate(float(1.0/VOLUME_SIZE) * center) * 365 | glm::scale(glm::vec3(scale, scale, scale)) * 366 | glm::rotate(float(rotate[2] * 180/M_PI), // glm requires digrees 367 | glm::vec3( 368 | sin(rotate[0]) * cos(rotate[1]), 369 | sin(rotate[0]) * sin(rotate[1]), 370 | cos(rotate[0]))) * 371 | glm::translate(glm::vec3(-0.5, -0.5, -0.5)); 372 | glUniformMatrix4fv(matrix, 1, GL_FALSE, &mvp[0][0]); 373 | 374 | glClear(GL_COLOR_BUFFER_BIT); 375 | glDrawArrays(GL_POINTS, 0, VIEW_PIXELS); 376 | 377 | glFinish(); 378 | 379 | Tensor3 *oarray = new Tensor3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE, true); 380 | CHECK(oarray); 381 | 382 | glBindTexture(GL_TEXTURE_2D, otexture); 383 | GLint v; 384 | glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &v); 385 | CHECK(v == VIEW_SIZE); 386 | glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &v); 387 | CHECK(v == VIEW_SIZE); 388 | 389 | glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, oarray->data); 390 | 391 | return oarray; 392 | } 393 | 394 | object sample_simple (float x, float y, float z, float phi, float theta, float kappa, float scale) { 395 | return sample(glm::vec3(x, y, z), glm::vec3(phi, theta, kappa), scale)->to_npy_and_delete(); 396 | } 397 | 398 | ~Sampler () { 399 | //check_thread(); 400 | glDisableVertexAttribArray(0); 401 | glDisableVertexAttribArray(1); 402 | glDeleteBuffers(1, &v_pos); 403 | glDeleteBuffers(1, &v_tex); 404 | glDeleteTextures(1, &itexture); 405 | glDeleteTextures(1, &otexture); 406 | glDeleteFramebuffers(1, &framebuffer); 407 | glDeleteVertexArrays(1, &v_array); 408 | glDeleteProgram(program); 409 | glfwTerminate(); 410 | } 411 | }; 412 | 413 | boost::shared_ptr globalSampler = 0; 414 | 415 | boost::shared_ptr get_sampler () { 416 | if (!globalSampler) { 417 | globalSampler = boost::shared_ptr(new Sampler()); 418 | } 419 | return globalSampler; 420 | } 421 | 422 | 423 | /* 424 | object render (PyObject *array, float z, float y, float x, float scale) { 425 | //Sampler renderer(64, 512, 512); 426 | glm::ivec3 off, len, shift; 427 | 428 | globalSampler->texture(array, &off, &len, &shift); 429 | std::default_random_engine rng; 430 | glm::vec3 center = off + glm::vec3(x, y, z); 431 | return renderer.sample(center, scale, rng); 432 | //return renderer.apply(array); 433 | } 434 | */ 435 | 436 | 437 | class H265Encoder { 438 | std::ostream &os; 439 | x265_encoder *enc; 440 | x265_picture *pic; 441 | vector framebuf; 442 | int rows, cols; 443 | 444 | void write_nal (x265_nal *p_nal, uint32_t i_nal) { 445 | for (unsigned i = 0; i < i_nal; ++i) { 446 | /* 447 | os.write(reinterpret_cast(&p_nal[i].type), sizeof(p_nal[i].type)); 448 | os.write(reinterpret_cast(&p_nal[i].sizeBytes), sizeof(p_nal[i].sizeBytes)); 449 | */ 450 | os.write(reinterpret_cast(p_nal[i].payload), p_nal[i].sizeBytes); 451 | } 452 | } 453 | public: 454 | H265Encoder (std::ostream &_os, int _rows, int _cols, int frames = 0): os(_os), rows(_rows), cols(_cols) { 455 | char buf[200]; 456 | int r; 457 | x265_param *param = x265_param_alloc(); 458 | CHECK(param); 459 | //r = x265_param_default_preset(param, "medium", "fastdecode"); 460 | r = x265_param_default_preset(param, "medium", "fastdecode"); 461 | CHECK(r == 0); 462 | r = x265_param_apply_profile(param, "main"); 463 | CHECK(r == 0); 464 | r = x265_param_parse(param, "lossless", NULL); 465 | CHECK(r == 0); 466 | /* 467 | if (frames > 0) { 468 | sprintf(buf, "%d", frames); 469 | r = x265_param_parse(param, "frames", buf); 470 | CHECK(r == 0); 471 | } 472 | */ 473 | CHECK(param->internalBitDepth == 8); 474 | /* 475 | r = x265_param_parse(param, "input-depth", "8"); 476 | CHECK(r == 0); 477 | */ 478 | sprintf(buf, "%dx%d", cols, rows); 479 | r = x265_param_parse(param, "input-res", buf); 480 | CHECK(r == 0); 481 | r = x265_param_parse(param, "input-csp", "i400"); 482 | CHECK(r == 0); 483 | r = x265_param_parse(param, "fps", "1"); 484 | CHECK(r == 0); 485 | enc = x265_encoder_open(param); 486 | CHECK(enc); 487 | pic = x265_picture_alloc(); 488 | CHECK(pic); 489 | x265_picture_init(param, pic); 490 | x265_param_free(param); 491 | x265_nal *p_nal; 492 | uint32_t i_nal; 493 | r = x265_encoder_headers(enc, &p_nal, &i_nal); 494 | CHECK(r >= 0); 495 | write_nal(p_nal, i_nal); 496 | } 497 | void encode (uint8_t *frame, unsigned stride1 = 0, unsigned stride2 = 0) { 498 | if (stride2 == 0) stride2 = sizeof(uint8_t); 499 | if (stride1 == 0) stride1 = cols * stride2; 500 | if (stride2 == sizeof(uint8_t)) { 501 | pic->planes[0] = frame; 502 | pic->stride[0] = stride1; 503 | } 504 | else { 505 | framebuf.resize(rows * cols); 506 | auto from_y = frame; 507 | unsigned o = 0; 508 | for (int y = 0; y < rows; ++y) { 509 | auto from_x = from_y; 510 | from_y += stride1; 511 | for (int x = 0; x < cols; ++x) { 512 | framebuf[o++] = *from_x; 513 | from_x += stride2; 514 | } 515 | } 516 | pic->planes[0] = &framebuf[0]; 517 | pic->stride[0] = cols * sizeof(uint8_t); 518 | } 519 | pic->planes[1] = pic->planes[2] = NULL; 520 | x265_nal *p_nal; 521 | uint32_t i_nal; 522 | int r = x265_encoder_encode(enc, &p_nal, &i_nal, pic, NULL); 523 | CHECK(r >= 0); 524 | write_nal(p_nal, i_nal); 525 | ++pic->pts; 526 | } 527 | void flush () { 528 | for (;;) { 529 | x265_nal *p_nal; 530 | uint32_t i_nal; 531 | int r = x265_encoder_encode(enc, &p_nal, &i_nal, NULL, NULL); 532 | if (r <= 0) break; 533 | write_nal(p_nal, i_nal); 534 | } 535 | } 536 | ~H265Encoder () { 537 | x265_picture_free(pic); 538 | x265_encoder_close(enc); 539 | } 540 | }; 541 | 542 | class H265Decoder { 543 | mutable de265_decoder_context* ctx; 544 | mutable std::mutex mutex; 545 | public: 546 | H265Decoder (unsigned threads=1): ctx(de265_new_decoder()) { 547 | CHECK(ctx); 548 | de265_set_parameter_bool(ctx, DE265_DECODER_PARAM_BOOL_SEI_CHECK_HASH, true); 549 | de265_set_parameter_bool(ctx, DE265_DECODER_PARAM_SUPPRESS_FAULTY_PICTURES, false); 550 | de265_start_worker_threads(ctx, threads); 551 | } 552 | void decode (const_buffer buf, std::function callback) const { 553 | std::lock_guard lock(mutex); 554 | 555 | de265_error err = de265_push_data(ctx, 556 | boost::asio::buffer_cast(buf), 557 | boost::asio::buffer_size(buf), 558 | 0, (void *)2); 559 | CHECK(err == DE265_OK); 560 | err = de265_flush_data(ctx); 561 | CHECK(err == DE265_OK); 562 | int more = 1; 563 | while (more) { 564 | err = de265_decode(ctx, &more); 565 | if (err != DE265_OK) break; 566 | const de265_image* img = de265_get_next_picture(ctx); 567 | for (;;) { 568 | de265_error warning = de265_get_warning(ctx); 569 | if (warning==DE265_OK) { 570 | break; 571 | } 572 | fprintf(stderr,"WARNING: %s\n", de265_get_error_text(warning)); 573 | } 574 | if (!img) continue; 575 | callback(img); 576 | } 577 | } 578 | Tensor3 *decode_array (const_buffer buf, Json const &meta) const { 579 | int Z = meta["size"].array_items()[0].int_value(); 580 | int Y = meta["size"].array_items()[1].int_value(); 581 | int X = meta["size"].array_items()[2].int_value(); 582 | // allocate storage 583 | Tensor3 *array = new Tensor3(Z, Y, X); 584 | CHECK(array); 585 | 586 | uint8_t *to_z = (uint8_t *)array->data; 587 | int cnt_z = 0; 588 | decode(buf, [&to_z, &cnt_z, Y, X, array](const de265_image* img) { 589 | int cols = de265_get_image_width(img, 0); 590 | int rows = de265_get_image_height(img, 0); 591 | CHECK(rows == Y); 592 | CHECK(cols == X); 593 | CHECK(de265_get_chroma_format(img) == de265_chroma_mono); 594 | CHECK(de265_get_bits_per_pixel(img, 0) == 8); 595 | int stride; 596 | const uint8_t* frame = de265_get_image_plane(img, 0, &stride); 597 | // copy 598 | 599 | uint8_t *to_y = to_z; 600 | to_z += array->strides[0]; 601 | ++cnt_z; 602 | for (int i = 0; i < rows; ++i, to_y += array->strides[1], frame += stride) { 603 | memcpy(to_y, frame, cols * sizeof(uint8_t)); 604 | } 605 | }); 606 | CHECK(cnt_z == Z); 607 | return array; 608 | } 609 | Tensor3 *decode_array_sampled (const_buffer buf, Json const &meta, unsigned off, unsigned step) const { 610 | int Z = meta["size"].array_items()[0].int_value(); 611 | int Y = meta["size"].array_items()[1].int_value(); 612 | int X = meta["size"].array_items()[2].int_value(); 613 | int SZ = 0; 614 | for (int i = off; i < Z; i += step) { 615 | ++SZ; // count output size 616 | } 617 | if (SZ == 0) return 0; 618 | 619 | // allocate storage 620 | Tensor3 *array = new Tensor3(SZ, Y, X); 621 | CHECK(array); 622 | 623 | uint8_t *to_z = (uint8_t *)array->data; 624 | int z = 0; 625 | int next = off; 626 | decode(buf, [&to_z, &z, &next, &step, Y, X, array](const de265_image* img) { 627 | if (z < next) { 628 | z++; 629 | return; 630 | } 631 | next += step; 632 | z++; 633 | 634 | int cols = de265_get_image_width(img, 0); 635 | int rows = de265_get_image_height(img, 0); 636 | CHECK(rows == Y); 637 | CHECK(cols == X); 638 | CHECK(de265_get_chroma_format(img) == de265_chroma_mono); 639 | CHECK(de265_get_bits_per_pixel(img, 0) == 8); 640 | int stride; 641 | const uint8_t* frame = de265_get_image_plane(img, 0, &stride); 642 | // copy 643 | 644 | uint8_t *to_y = to_z; 645 | to_z += array->strides[0]; 646 | for (int i = 0; i < rows; ++i, to_y += array->strides[1], frame += stride) { 647 | memcpy(to_y, frame, cols * sizeof(uint8_t)); 648 | } 649 | }); 650 | CHECK(to_z == (uint8_t *)array->data + array->strides[0] * SZ); 651 | return array; 652 | } 653 | 654 | ~H265Decoder () { 655 | de265_free_decoder(ctx); 656 | } 657 | }; 658 | 659 | string encode (PyObject *_array) { 660 | ostringstream ss; 661 | PyArrayObject *array((PyArrayObject *)_array); 662 | CHECK(array->nd == 3) << "not 3d array: " << array->nd; 663 | CHECK(array->descr->type_num == NPY_UINT8) << "not uint8 array"; 664 | H265Encoder enc(ss, array->dimensions[1], 665 | array->dimensions[2], 666 | array->dimensions[0]); 667 | auto from_z = reinterpret_cast(array->data); 668 | for (unsigned z = 0; z < array->dimensions[0]; ++z) { 669 | enc.encode(from_z, array->strides[1], array->strides[2]); 670 | from_z += array->strides[0]; 671 | } 672 | enc.flush(); 673 | return ss.str(); 674 | } 675 | 676 | /* 677 | object mats2npy (vector const &images) { 678 | int rows = images[0].rows; 679 | int cols = images[0].cols; 680 | npy_intp images_dims[] = {images.size(), rows, cols}; 681 | PyObject *_array = PyArray_SimpleNew(3, &images_dims[0], NPY_UINT8); 682 | PyArrayObject *array = (PyArrayObject*)_array; 683 | CHECK(array->strides[2] == 1); 684 | auto from_z = reinterpret_cast(array->data); 685 | for (unsigned z = 0; z < images.size(); ++z) { 686 | CHECK(images[z].isContinuous()); 687 | auto from_y = from_z; 688 | from_z += array->strides[0]; 689 | for (unsigned y = 0; y < rows; ++y) { 690 | memcpy(from_y, images[z].ptr(y), cols * sizeof(uint8_t)); 691 | from_y += array->strides[1]; 692 | } 693 | } 694 | return object(boost::python::handle<>(_array)); 695 | } 696 | */ 697 | 698 | object decode (string const &v, int Z, int Y, int X) { 699 | Json meta = Json::object{ 700 | {"size", Json::array{Z, Y, X}} 701 | }; 702 | H265Decoder dec; 703 | return dec.decode_array(const_buffer(&v[0], v.size()), meta)->to_npy_and_delete(); 704 | } 705 | 706 | namespace picpac { 707 | 708 | #define PICPAC_VOLUME_CONFIG_UPDATE_ALL(C) \ 709 | PICPAC_CONFIG_UPDATE(C,seed);\ 710 | PICPAC_CONFIG_UPDATE(C,loop);\ 711 | PICPAC_CONFIG_UPDATE(C,shuffle);\ 712 | PICPAC_CONFIG_UPDATE(C,reshuffle);\ 713 | PICPAC_CONFIG_UPDATE(C,stratify);\ 714 | PICPAC_CONFIG_UPDATE(C,split);\ 715 | PICPAC_CONFIG_UPDATE(C,split_fold);\ 716 | PICPAC_CONFIG_UPDATE(C,split_negate);\ 717 | PICPAC_CONFIG_UPDATE(C,mixin);\ 718 | PICPAC_CONFIG_UPDATE(C,mixin_group_delta);\ 719 | PICPAC_CONFIG_UPDATE(C,mixin_max);\ 720 | PICPAC_CONFIG_UPDATE(C,cache);\ 721 | PICPAC_CONFIG_UPDATE(C,preload);\ 722 | PICPAC_CONFIG_UPDATE(C,threads);\ 723 | PICPAC_CONFIG_UPDATE(C,channels);\ 724 | PICPAC_CONFIG_UPDATE(C,min_size);\ 725 | PICPAC_CONFIG_UPDATE(C,max_size);\ 726 | PICPAC_CONFIG_UPDATE(C,resize_width);\ 727 | PICPAC_CONFIG_UPDATE(C,resize_height);\ 728 | PICPAC_CONFIG_UPDATE(C,crop_width);\ 729 | PICPAC_CONFIG_UPDATE(C,crop_height);\ 730 | PICPAC_CONFIG_UPDATE(C,round_div);\ 731 | PICPAC_CONFIG_UPDATE(C,round_mod);\ 732 | PICPAC_CONFIG_UPDATE(C,decode_mode);\ 733 | PICPAC_CONFIG_UPDATE(C,annotate);\ 734 | PICPAC_CONFIG_UPDATE(C,anno_type);\ 735 | PICPAC_CONFIG_UPDATE(C,anno_copy);\ 736 | PICPAC_CONFIG_UPDATE(C,anno_palette);\ 737 | PICPAC_CONFIG_UPDATE(C,anno_color1); \ 738 | PICPAC_CONFIG_UPDATE(C,anno_color2); \ 739 | PICPAC_CONFIG_UPDATE(C,anno_color3); \ 740 | PICPAC_CONFIG_UPDATE(C,anno_thickness);\ 741 | PICPAC_CONFIG_UPDATE(C,anno_min_ratio); \ 742 | PICPAC_CONFIG_UPDATE(C,perturb);\ 743 | PICPAC_CONFIG_UPDATE(C,pert_colorspace); \ 744 | PICPAC_CONFIG_UPDATE(C,pert_color1); \ 745 | PICPAC_CONFIG_UPDATE(C,pert_color2); \ 746 | PICPAC_CONFIG_UPDATE(C,pert_color3); \ 747 | PICPAC_CONFIG_UPDATE(C,pert_angle); \ 748 | PICPAC_CONFIG_UPDATE(C,pert_min_scale); \ 749 | PICPAC_CONFIG_UPDATE(C,pert_max_scale); \ 750 | PICPAC_CONFIG_UPDATE(C,pert_hflip); \ 751 | PICPAC_CONFIG_UPDATE(C,pert_vflip); 752 | 753 | std::mutex global_lock; 754 | 755 | struct Cube { 756 | Tensor3 *images; 757 | Tensor3 *labels; 758 | }; 759 | 760 | vector global_cube_pool; 761 | 762 | class CubeLoader: public ImageLoader { 763 | public: 764 | // we use pert_color2 & pert_color3 to sample rotate axis 765 | // 766 | struct Config: public ImageLoader::Config { 767 | unsigned samples0; 768 | unsigned samples1; 769 | unsigned pool; 770 | unsigned factor; 771 | unsigned decode_threads; 772 | Config (): samples0(4), samples1(4), pool(128), factor(1), decode_threads(1) { 773 | } 774 | } config; 775 | 776 | H265Decoder dec; 777 | 778 | typedef Cube Value; 779 | 780 | CubeLoader (Config const &config_): ImageLoader(config_), config(config_), dec(config_.decode_threads) { 781 | } 782 | 783 | struct Nodule { 784 | glm::vec3 pos; 785 | float radius; 786 | }; 787 | 788 | struct Sample { 789 | glm::vec3 pos; 790 | glm::vec3 angle; 791 | float scale; 792 | Sample () {} 793 | Sample (float x, float y, float z, 794 | float phi, float theta, float kappa, 795 | float s): pos(x, y, z), angle(phi, theta, kappa), scale(s) { 796 | } 797 | }; 798 | 799 | static float l2norm (glm::vec3 const &p) { 800 | return std::sqrt(p[0]*p[0] + p[1]*p[1] + p[2]*p[2]); 801 | } 802 | 803 | Tensor3 *generate_labels (glm::vec3 center, glm::vec3 rotate, float scale0, vector const &from_nodules) const { //std::default_random_engine &rng) { 804 | 805 | CHECK_POWER_OF_TWO(config.factor); 806 | // 1. convert nodules to cube location 807 | glm::mat4 unrotate = glm::inverse( 808 | glm::rotate(float(rotate[2] * 180/M_PI), // glm requires digrees 809 | glm::vec3( 810 | sin(rotate[0]) * cos(rotate[1]), 811 | sin(rotate[0]) * sin(rotate[1]), 812 | cos(rotate[0])))); 813 | 814 | int cs = Sampler::CUBE_SIZE/config.factor; 815 | float cs2 = cs/2.0; 816 | vector nodules; 817 | float scale = scale0 / config.factor; 818 | //scale0 *= config.factor; 819 | glm::vec3 cc(cs2, cs2, cs2); 820 | static constexpr float SQRT3 = 1.7320508075688772; 821 | float box_radius = cs2 * SQRT3; 822 | 823 | for (auto const &nod: from_nodules) { 824 | Nodule nnod; 825 | nnod.pos = glm::vec3(unrotate*glm::vec4(nod.pos - center, 1)) * scale + cs2; 826 | nnod.radius = nod.radius * scale; 827 | 828 | float dist = l2norm(cc - nnod.pos); 829 | if (dist < nnod.radius + box_radius) { 830 | nodules.push_back(nnod); 831 | } 832 | } 833 | 834 | if (nodules.empty()) return 0; 835 | Tensor3 *array = new Tensor3(cs, cs, cs, true); 836 | CHECK(array); 837 | for (auto const &nod: nodules) { 838 | float x = nod.pos[0]; 839 | float y = nod.pos[1]; 840 | float z = nod.pos[2]; 841 | float r = nod.radius; 842 | int lb = int(floor(z-r)); 843 | int ub = int(ceil(z+r)); 844 | for (int i = lb; i <= ub; ++i) { 845 | if (i < 0) continue; 846 | if (i >= cs) continue; 847 | float dz = i - z; 848 | int r0 = round(sqrt(r * r - dz * dz)); 849 | if (r0 < 2) continue; 850 | cv::Mat image(cs, cs, CV_8U, array->data + i * array->strides[0]); 851 | cv::circle(image, cv::Point(int(round(x)), int(round(y))), r0, cv::Scalar(1), -1); 852 | } 853 | } 854 | return array; 855 | } 856 | 857 | 858 | void load (RecordReader rr, PerturbVector const &p, Value *out, 859 | CacheValue *c, std::mutex *m) const { 860 | lock_guard lock(global_lock); 861 | if (global_cube_pool.size() < config.pool) { 862 | Record r; 863 | rr(&r); 864 | CHECK(r.size() > 1); 865 | string err; 866 | Json json = Json::parse(r.field_string(1), err); 867 | 868 | Tensor3 *array = dec.decode_array(r.field(0), json); 869 | CHECK(array); 870 | 871 | glm::ivec3 off, len, shift; 872 | get_sampler(); 873 | globalSampler->texture(array, &off, &len, &shift); 874 | 875 | // nodules 876 | vector nodules; 877 | Json const &json_nodules = json["shapes"]; 878 | if (json_nodules.is_array()) { 879 | for (auto const &nod: json_nodules.array_items()) { 880 | CHECK(nod["type"].string_value() == "ball"); 881 | nodules.emplace_back(); 882 | nodules.back().pos = glm::vec3( 883 | nod["x"].number_value()-shift[0], 884 | nod["y"].number_value()-shift[1], 885 | nod["z"].number_value()-shift[2]); 886 | nodules.back().radius = nod["r"].number_value(); 887 | } 888 | } 889 | // copy to ... 890 | // sample 891 | std::default_random_engine rng(p.shiftx); 892 | std::uniform_real_distribution delta_color(-config.pert_color1, config.pert_color1); 893 | std::uniform_real_distribution phi(-M_PI/2, M_PI/2); 894 | std::uniform_real_distribution theta(-M_PI, M_PI); 895 | std::uniform_real_distribution kappa(-M_PI*config.pert_angle/180, M_PI * config.pert_angle/180); 896 | std::uniform_real_distribution linear_scale(config.pert_min_scale, config.pert_max_scale); 897 | 898 | vector samples; 899 | 900 | int constexpr cs = Sampler::CUBE_SIZE; 901 | int constexpr cs2 = Sampler::CUBE_SIZE/2; 902 | if (config.perturb) { 903 | for (unsigned i = 0; i < config.samples0; ++i) { 904 | samples.emplace_back( 905 | off[0] + rng() % (len[0] - cs) + cs2, 906 | off[1] + rng() % (len[1] - cs) + cs2, 907 | off[2] + rng() % (len[2] - cs) + cs2, 908 | phi(rng), theta(rng), kappa(rng), linear_scale(rng) 909 | ); 910 | } 911 | vector idx; // nodule indexes to use 912 | for (unsigned i = 0; i < nodules.size(); ++i) { 913 | idx.push_back(i); 914 | } 915 | std::shuffle(idx.begin(), idx.end(), rng); 916 | if (idx.size() > config.samples1) { 917 | idx.resize(config.samples1); 918 | } 919 | else if (idx.size() > 0) { 920 | // over sample 921 | unsigned i = 0; 922 | while (idx.size() < config.samples1) { 923 | idx.push_back(idx[i++]); 924 | } 925 | } 926 | for (int i: idx) { 927 | // sample from nodules 928 | auto const &nodule = nodules[i]; 929 | std::uniform_real_distribution shift2(-nodule.radius, nodule.radius); 930 | samples.emplace_back( 931 | nodule.pos[0] + shift2(rng), 932 | nodule.pos[1] + shift2(rng), 933 | nodule.pos[2] + shift2(rng), 934 | phi(rng), theta(rng), kappa(rng), linear_scale(rng) 935 | ); 936 | } 937 | std::shuffle(samples.begin(), samples.end(), rng); 938 | } 939 | else { 940 | if (config.samples0 > 0) { 941 | // full image 942 | int constexpr vs2 = Sampler::VOLUME_SIZE/2; 943 | samples.emplace_back( 944 | vs2, vs2, vs2, 945 | 0, 0, 0, Sampler::VOLUME_SIZE / Sampler::CUBE_SIZE); 946 | } 947 | if (config.samples1 > 0) { 948 | // all nodules 949 | for (auto const &nodule: nodules) { 950 | // sample from nodules 951 | samples.emplace_back( 952 | nodule.pos[0], 953 | nodule.pos[1], 954 | nodule.pos[2], 955 | 0, 0, 0, 1.0); 956 | } 957 | } 958 | } 959 | CHECK(samples.size()); 960 | for (auto const &s: samples) { 961 | Cube cube; 962 | cube.images = globalSampler->sample(s.pos, s.angle, s.scale); 963 | if (config.perturb) { 964 | cv::Mat mat(Sampler::VIEW_SIZE, Sampler::VIEW_SIZE, CV_8U, cube.images->data); 965 | mat += delta_color(rng); 966 | } 967 | cube.labels = generate_labels(s.pos, s.angle, s.scale, nodules); 968 | global_cube_pool.push_back(cube); 969 | } 970 | delete array; 971 | std::shuffle(global_cube_pool.begin(), global_cube_pool.end(), rng); 972 | } 973 | CHECK(global_cube_pool.size()); 974 | *out = global_cube_pool.back(); 975 | global_cube_pool.pop_back(); 976 | } 977 | }; 978 | 979 | class CubeStream: public PrefetchStream { 980 | public: 981 | CubeStream (std::string const &path, Config const &config) : PrefetchStream(fs::path(path), config) { 982 | CHECK(global_cube_pool.empty()); 983 | } 984 | 985 | tuple next () { 986 | Cube cube = PrefetchStream::next(); 987 | if (cube.labels) { 988 | return make_tuple(cube.images->to_npy_and_delete(), 989 | cube.labels->to_npy_and_delete()); 990 | } 991 | else { 992 | return make_tuple(cube.images->to_npy_and_delete(), object()); 993 | } 994 | } 995 | }; 996 | 997 | object create_cube_stream (tuple args, dict kwargs) { 998 | object self = args[0]; 999 | CHECK(len(args) > 1); 1000 | string path = extract(args[1]); 1001 | CubeStream::Config config; 1002 | #define PICPAC_CONFIG_UPDATE(C, P) \ 1003 | C.P = extract(kwargs.get(#P, C.P)) 1004 | PICPAC_VOLUME_CONFIG_UPDATE_ALL(config); 1005 | PICPAC_CONFIG_UPDATE(config,samples0); 1006 | PICPAC_CONFIG_UPDATE(config,samples1); 1007 | PICPAC_CONFIG_UPDATE(config,pool); 1008 | PICPAC_CONFIG_UPDATE(config,factor); 1009 | PICPAC_CONFIG_UPDATE(config,decode_threads); 1010 | #undef PICPAC_CONFIG_UPDATE 1011 | CHECK(!config.cache) << "Cube stream should net be cached"; 1012 | if (!config.perturb) { 1013 | LOG(WARNING) << "perturb == FALSE: for testing only, don't use for training."; 1014 | LOG(WARNING) << "NODULES are in original resolution"; 1015 | LOG(WARNING) << "FULL IMAGES are of 1/8 resolution (512 to 64)"; 1016 | } 1017 | CHECK(config.channels == 1) << "Cube stream only supports 1 channels."; 1018 | CHECK(config.threads == 1); 1019 | CHECK(config.pool > 0); 1020 | LOG(WARNING) << "preload: " << config.preload; 1021 | return self.attr("__init__")(path, config); 1022 | }; 1023 | 1024 | class VolumeLoader: public ImageLoader { 1025 | static void dummy_record_reader (Record *) { 1026 | CHECK(false) << "Dummy record reader should never be invoked."; 1027 | } 1028 | public: 1029 | struct Config: public ImageLoader::Config { 1030 | int stride; 1031 | int decode_threads; 1032 | Config (): stride(3), decode_threads(1) { 1033 | } 1034 | } config; 1035 | H265Decoder dec; 1036 | typedef Tensor3* Value; 1037 | VolumeLoader (Config const &config_): ImageLoader(config_), config(config_), dec(config_.decode_threads) { 1038 | } 1039 | void load (RecordReader rr, PerturbVector const &p, Value *out, 1040 | CacheValue *c = nullptr, std::mutex *m = nullptr) const { 1041 | Record r; 1042 | rr(&r); 1043 | CHECK(r.size() >= 1); 1044 | string err; 1045 | Json json = Json::parse(r.field_string(1), err); 1046 | 1047 | unsigned off = 0; 1048 | if (config.perturb) off = p.shiftx % config.stride; 1049 | 1050 | std::mutex dummy_mutex; 1051 | CHECK(config.stride > 0); 1052 | Tensor3 *array = dec.decode_array_sampled(r.field(0), json, off, config.stride); 1053 | CHECK(array); 1054 | if (config.perturb) { 1055 | ImageLoader::CacheValue cache; 1056 | ImageLoader::Value loaded; 1057 | cache.label = 0; 1058 | cache.annotation = cv::Mat(); 1059 | uint8_t *z = (uint8_t *)array->data; 1060 | cache.image = cv::Mat(array->dimensions[1], array->dimensions[2], CV_8U, (void *)z); 1061 | ImageLoader::load(dummy_record_reader,p, &loaded, &cache, &dummy_mutex); 1062 | CHECK(loaded.image.rows > 0); 1063 | CHECK(loaded.image.type() == CV_8U); 1064 | 1065 | // allocate storage 1066 | Tensor3 *oarray = new Tensor3(array->dimensions[0], loaded.image.rows, loaded.image.cols); 1067 | CHECK(oarray); 1068 | uint8_t *to_z = (uint8_t *)oarray->data; 1069 | int total = loaded.image.rows * loaded.image.cols; 1070 | uint8_t const *xxx = loaded.image.ptr(0); 1071 | std::copy(xxx, xxx + total, to_z); 1072 | for (int i = 1; i < array->dimensions[0]; ++i) { 1073 | z += array->strides[0]; 1074 | to_z += total; 1075 | cache.image = cv::Mat(array->dimensions[1], array->dimensions[2], CV_8U, (void *)z); 1076 | ImageLoader::load(dummy_record_reader,p, &loaded, &cache, &dummy_mutex); 1077 | CHECK(loaded.image.rows == oarray->dimensions[1]); 1078 | CHECK(loaded.image.cols == oarray->dimensions[2]); 1079 | CHECK(loaded.image.type() == CV_8U); 1080 | xxx = loaded.image.ptr(0); 1081 | std::copy(xxx, xxx + total, to_z); 1082 | } 1083 | delete array; 1084 | array = oarray; 1085 | } 1086 | array->label = r.meta().label; 1087 | *out = array; 1088 | } 1089 | }; 1090 | 1091 | class VolumeStream: public PrefetchStream { 1092 | public: 1093 | VolumeStream (std::string const &path, Config const &config) 1094 | : PrefetchStream(fs::path(path), config) { 1095 | } 1096 | tuple next () { 1097 | auto v = PrefetchStream::next(); 1098 | float l = v->label; 1099 | return make_tuple(l, v->to_npy_and_delete()); 1100 | } 1101 | }; 1102 | 1103 | object create_volume_stream (tuple args, dict kwargs) { 1104 | object self = args[0]; 1105 | CHECK(len(args) > 1); 1106 | string path = extract(args[1]); 1107 | VolumeStream::Config config; 1108 | #define PICPAC_CONFIG_UPDATE(C, P) \ 1109 | C.P = extract(kwargs.get(#P, C.P)) 1110 | PICPAC_VOLUME_CONFIG_UPDATE_ALL(config); 1111 | PICPAC_CONFIG_UPDATE(config, stride); 1112 | PICPAC_CONFIG_UPDATE(config,decode_threads); 1113 | #undef PICPAC_CONFIG_UPDATE 1114 | CHECK(!config.cache); 1115 | CHECK(config.channels == 1); 1116 | CHECK(config.threads == 1); 1117 | return self.attr("__init__")(path, config); 1118 | }; 1119 | 1120 | static object return_iterator (tuple args, dict kwargs) { 1121 | object self = args[0]; 1122 | self.attr("reset")(); 1123 | return self; 1124 | }; 1125 | 1126 | // copied from picpac 1127 | class Writer: public FileWriter { 1128 | public: 1129 | Writer (string const &path): FileWriter(fs::path(path), FileWriter::COMPACT) { 1130 | } 1131 | void append (float label, string const &buf) { 1132 | Record record(label, buf); 1133 | FileWriter::append(record); 1134 | } 1135 | 1136 | void append (string const &buf1, string const &buf2) { 1137 | Record record(0, buf1, buf2); 1138 | FileWriter::append(record); 1139 | } 1140 | 1141 | void append (float label, string const &buf1, string const &buf2) { 1142 | Record record(label, buf1, buf2); 1143 | FileWriter::append(record); 1144 | } 1145 | }; 1146 | 1147 | void (Writer::*append1) (float, string const &) = &Writer::append; 1148 | void (Writer::*append2) (string const &, string const &) = &Writer::append; 1149 | void (Writer::*append3) (float, string const &, string const &) = &Writer::append; 1150 | 1151 | void translate_eos (EoS const &) 1152 | { 1153 | // Use the Python 'C' API to set up an exception object 1154 | PyErr_SetNone(PyExc_StopIteration); 1155 | } 1156 | } 1157 | 1158 | using namespace picpac; 1159 | 1160 | struct CubicLibGuard { 1161 | CubicLibGuard() { 1162 | LOG(INFO) << "Initializing PicPac3D library"; 1163 | import_array(); 1164 | //CHECK(globalSampler); 1165 | } 1166 | ~CubicLibGuard() { 1167 | LOG(INFO) << "Cleaning PicPac3D library"; 1168 | globalSampler = 0; 1169 | //delete globalSampler; 1170 | for (auto &cube: global_cube_pool) { 1171 | if (cube.images) delete cube.images; 1172 | if (cube.labels) delete cube.labels; 1173 | } 1174 | global_cube_pool.clear(); 1175 | x265_cleanup(); 1176 | } 1177 | }; 1178 | 1179 | BOOST_PYTHON_MODULE(picpac3d) 1180 | { 1181 | 1182 | class_, boost::noncopyable>("CubicLibGuard", no_init); 1183 | boost::shared_ptr picpac3dLibGuard(new CubicLibGuard()); 1184 | scope().attr("__libguard") = picpac3dLibGuard; 1185 | 1186 | class_, boost::noncopyable>("Sampler", no_init) 1187 | .def("texture_direct", &Sampler::texture_direct) 1188 | .def("texture_indirect", &Sampler::texture_indirect) 1189 | .def("sample", &Sampler::sample_simple) 1190 | ; 1191 | 1192 | numeric::array::set_module_and_type("numpy", "ndarray"); 1193 | register_exception_translator(&translate_eos); 1194 | def("encode", ::encode); 1195 | def("decode", ::decode); 1196 | def("sampler", ::get_sampler); 1197 | //def("render", ::render); 1198 | class_("CubeStreamParams", init<>()); 1199 | class_("VolumeStreamParams", init<>()); 1200 | class_("CubeStream", no_init) 1201 | .def("__init__", raw_function(create_cube_stream), "exposed ctor") 1202 | .def("__iter__", raw_function(return_iterator)) 1203 | .def(init()) // C++ constructor not exposed 1204 | .def("next", &CubeStream::next) 1205 | .def("size", &CubeStream::size) 1206 | .def("reset", &CubeStream::reset) 1207 | ; 1208 | class_("VolumeStream", no_init) 1209 | .def("__init__", raw_function(create_volume_stream), "exposed ctor") 1210 | .def("__iter__", raw_function(return_iterator)) 1211 | .def(init()) // C++ constructor not exposed 1212 | .def("next", &VolumeStream::next) 1213 | .def("size", &VolumeStream::size) 1214 | .def("reset", &VolumeStream::reset) 1215 | ; 1216 | class_("Writer", init()) 1217 | .def("append", append1) 1218 | .def("append", append2) 1219 | .def("append", append3) 1220 | ; 1221 | } 1222 | 1223 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | picpac3d = Extension('picpac3d', 4 | language = 'c++', 5 | extra_compile_args = ['-O3', '-std=c++1y'], 6 | libraries = ['opencv_highgui', 'opencv_core', 'boost_filesystem', 'boost_system', 'boost_python', 'glog', 'x265', 'de265', 'glfw', 'GLEW', 'GL'], 7 | include_dirs = ['/usr/local/include', 'picpac', 'picpac/json11'], 8 | library_dirs = ['/usr/local/lib'], 9 | sources = ['picpac3d.cpp', 'picpac/picpac.cpp', 'picpac/picpac-cv.cpp', 'picpac/json11/json11.cpp'], 10 | depends = ['picpac/json11/json11.hpp', 'picpac/picpac.h', 'picpac/picpac-cv.h'] 11 | ) 12 | 13 | setup (name = 'cubic', 14 | version = '0.0.1', 15 | author = 'Wei Dong', 16 | author_email = 'wdong@wdong.org', 17 | license = 'LGPL', 18 | description = 'This is a demo package', 19 | ext_modules = [picpac3d], 20 | ) 21 | --------------------------------------------------------------------------------