├── .gitattributes
├── gpu-scanline
    └── src
    │   ├── rvg_loader.cpp
    │   ├── mochimazui
    │       ├── stdext.cpp
    │       ├── stdext.h
    │       ├── vector_type.h
    │       ├── color.h
    │       ├── camera_3d.cpp
    │       ├── file.cpp
    │       ├── file.h
    │       ├── option.h
    │       ├── bitmap.h
    │       ├── camera_2d.h
    │       ├── camera_controller_2d.h
    │       ├── camera_controller_3d.h
    │       ├── stdio_ext.h
    │       ├── bitmap.cpp
    │       ├── camera_3d.h
    │       ├── config.cpp
    │       ├── camera_controller_3d.cpp
    │       └── cuda_array.h
    │   ├── text_util.h
    │   ├── text_util.cpp
    │   ├── rvg_loader.h
    │   ├── svg_load.cpp
    │   ├── rasterizer
    │       ├── shared
    │       │   ├── ras_cut.cu
    │       │   ├── ras_base.cu
    │       │   ├── ras_factory.h
    │       │   ├── ras_pipeline_mode.h
    │       │   ├── ras_scan.h
    │       │   ├── ras_define.h
    │       │   └── ras_qm_mask.cu
    │       ├── kernel
    │       │   └── animation.h
    │       └── R_cut_A_mask_comb_scanline
    │       │   └── ras_cut_mask_comb_scanline.h
    │   ├── cuda
    │       ├── cuda_sort.h
    │       ├── cuda_cached_allocator.cpp
    │       ├── cuda_sort.cu
    │       └── cuda_cached_allocator.h
    │   ├── svg_loader.cpp
    │   ├── svg_loader.h
    │   ├── bezier_curve_type.h
    │   ├── tiger
    │       └── tiger.h
    │   ├── thrust_impl.h
    │   ├── rvg.h
    │   ├── bounding_box.h
    │   ├── svg.h
    │   ├── modern_gpu
    │       └── include
    │       │   ├── kernels
    │       │       ├── cubradixsort.cuh
    │       │       ├── loadbalance.cuh
    │       │       ├── localitysort.cuh
    │       │       ├── reduce.cuh
    │       │       └── bulkremove.cuh
    │       │   ├── moderngpu.cuh
    │       │   ├── mgpuenums.h
    │       │   ├── util
    │       │       ├── mgpualloc.h
    │       │       ├── util.h
    │       │       ├── format.h
    │       │       └── static.h
    │       │   ├── kernels_ext
    │       │       └── search_ext.cuh
    │       │   ├── device
    │       │       ├── launchbox.cuh
    │       │       ├── deviceutil.cuh
    │       │       ├── ctasegscan.cuh
    │       │       └── ctaloadbalance.cuh
    │       │   ├── sparsematrix.h
    │       │   └── mmio.h
    │   ├── gradient.h
    │   ├── thrust_impl_scan.cu
    │   ├── timer.h
    │   ├── rapidxml_utils.hpp
    │   ├── vg_config.cpp
    │   ├── vg_config.h
    │   └── rapidxml_iterators.hpp
├── working_directory
    ├── shader
    │   ├── shared
    │   │   ├── curve.frag.glsl
    │   │   ├── output_scale.frag.glsl
    │   │   ├── fps.vert.glsl
    │   │   ├── curve.vert.glsl
    │   │   ├── integrate_samples.vert.glsl
    │   │   ├── output_scale.vert.glsl
    │   │   ├── integrate_samples.frag.glsl
    │   │   └── fps.frag.glsl
    │   └── R_cut_A_stencil
    │   │   ├── output_8.frag.glsl
    │   │   ├── ms_output_32.frag.glsl
    │   │   ├── ms_output_8.frag.glsl
    │   │   ├── output_32.frag.glsl
    │   │   ├── ms_output_32.frag.glsl.before_368.22
    │   │   ├── ms_output_8.vert.glsl
    │   │   ├── output_8.vert.glsl
    │   │   ├── output_32.vert.glsl
    │   │   └── ms_output_32.vert.glsl
    ├── .gitignore
    ├── ui
    │   ├── minimal_ui.json
    │   └── ui.json
    └── vg_default.cfg
├── gpu-scanline-path-rendering-core.sln
├── LICENSE
├── README.md
└── .gitignore


/.gitattributes:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rvg_loader.cpp:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/stdext.cpp:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/stdext.h:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/text_util.h:
--------------------------------------------------------------------------------
1 | 
2 | #pragma once
3 | 
4 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/text_util.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "text_util.h"
3 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/vector_type.h:
--------------------------------------------------------------------------------
1 | 
2 | #pragma once
3 | 
4 | namespace Mochimazui {
5 | }
6 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rvg_loader.h:
--------------------------------------------------------------------------------
1 | 
2 | #pragma once
3 | 
4 | namespace Mochimazui {
5 | 
6 | void load_rvg();
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/svg_load.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mochimazui/gpu-scanline-path-rendering/HEAD/gpu-scanline/src/svg_load.cpp


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/color.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mochimazui/gpu-scanline-path-rendering/HEAD/gpu-scanline/src/mochimazui/color.h


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/camera_3d.cpp:
--------------------------------------------------------------------------------
1 | #include "camera_3d.h"
2 | 
3 | namespace Mochimazui {
4 | 
5 | 	glm::mat4x4 Camera3D::matrix() {
6 | 		return _matrix;
7 | 	}
8 | 
9 | }


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_cut.cu:
--------------------------------------------------------------------------------
1 | 
2 | namespace Mochimazui {
3 | 
4 | namespace Rasterizer {
5 | 
6 | } // end of namespace Rasterizers
7 | 
8 | } // end of namespace Mochimazui
9 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/cuda/cuda_sort.h:
--------------------------------------------------------------------------------
1 | 
2 | #pragma once
3 | 
4 | #include <cstdint>
5 | 
6 | namespace Mochimazui {
7 | 	void cuda_seg_sort_int_by_int(int* key,int* data,int n,int* segs,int nsegs);	
8 | }
9 | 


--------------------------------------------------------------------------------
/working_directory/shader/shared/curve.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | flat in vec4 fragColor;
 5 | 
 6 | layout(location = 0) out vec4 color;
 7 | 
 8 | void main() {
 9 | 	color = fragColor;
10 | }
11 | 


--------------------------------------------------------------------------------
/working_directory/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | font/
 3 | output/
 4 | test/
 5 | result/
 6 | upload/
 7 | debug_dump/
 8 | 
 9 | *.cmd
10 | *.txt
11 | *.bmp
12 | *.png
13 | *.pdf
14 | *.lnk
15 | *.exe
16 | *.ttf
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/cuda/cuda_cached_allocator.cpp:
--------------------------------------------------------------------------------
1 | 
2 | #include "cuda_cached_allocator.h"
3 | 
4 | namespace Mochimazui {
5 | 
6 | cuda_cached_allocator g_thrustCachedAllocator;
7 | cuda_cached_allocator &g_alloc = g_thrustCachedAllocator;
8 | 
9 | }


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/file.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #define _CRT_SECURE_NO_WARNINGS
 3 | 
 4 | #include <mochimazui/file.h>
 5 | 
 6 | #include <cstdio>
 7 | #include <cassert>
 8 | 
 9 | #include <mochimazui/stdio_ext.h>
10 | 
11 | namespace Mochimazui {
12 | }


--------------------------------------------------------------------------------
/gpu-scanline/src/svg_loader.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "svg_loader.h"
 3 | 
 4 | #include "vg_container.h"
 5 | 
 6 | namespace Mochimazui {
 7 | 
 8 | std::shared_ptr<VGContainer>
 9 | load_svg(const std::string &file_name, bool stroke_to_fill) {
10 | 	return nullptr;
11 | }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/svg_loader.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include <string>
 5 | #include <memory>
 6 | 
 7 | namespace Mochimazui {
 8 | 
 9 | class VGContainer;
10 | 
11 | std::shared_ptr<VGContainer> 
12 | load_svg(const std::string &file_name, bool stroke_to_fill);
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/working_directory/ui/minimal_ui.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "window": {
 3 |     "id": "top", "type": "window",
 4 |     "layout": "horizontal",
 5 |     "width": 1200, "height": 1024,
 6 |     "title": "GPU Scanline VG",
 7 |     "subwindows": [
 8 |       { "id": "display","type": "subwindow" }
 9 |     ]
10 |   },
11 | 
12 |   "subwindows": {
13 |   }
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/working_directory/shader/shared/output_scale.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450 
 3 | 
 4 | layout(binding = 0) uniform sampler2D scale_tex;
 5 | 
 6 | in vec2 texcoord;
 7 | 
 8 | layout(location = 0) out vec4 color; 
 9 | 
10 | void main(){ 
11 | 	if (texcoord.x < 0 || texcoord.y < 0) {
12 | 		color = vec4(0, 0, 0, 1);
13 | 	}
14 | 	color = texture2D(scale_tex, texcoord);
15 | };
16 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/cuda/cuda_sort.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "cuda_sort.h"
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | #include <iostream>
 7 | 
 8 | #include "../modern_gpu/include/kernels_ext/segmentedsort_ext.cuh"
 9 | 
10 | namespace Mochimazui {
11 | 
12 | 	void cuda_seg_sort_int_by_int(int* key,int* data,int n,int* segs,int nsegs){
13 | 		mgpu_ext::SegSortPairsFromIndices(key, data, n, segs,nsegs);
14 | 	}
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/bezier_curve_type.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | namespace Mochimazui {
 5 | 
 6 | enum BezierCurveType {
 7 | 	BCT_Linear = 0x02,
 8 | 	BCT_Quadratic = 0x03,
 9 | 	BCT_Cubic = 0x04,
10 | 	BCT_Rational = 0x13,
11 | };
12 | 
13 | enum VGCurveType {
14 | 	CT_Linear = BCT_Linear,
15 | 	CT_Quadratic = BCT_Quadratic,
16 | 	CT_Cubic = BCT_Cubic,
17 | 	CT_Rational = BCT_Rational,
18 | };
19 | 
20 | } // end of namespace Mochimazui
21 | 


--------------------------------------------------------------------------------
/working_directory/shader/shared/fps.vert.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | #define SIZE 10
 5 | 
 6 | uniform ivec2 vp_size;
 7 | 
 8 | vec2 vs[4] = {
 9 | 	vec2(0, vp_size.y - 60),
10 | 	vec2(300, vp_size.y - 60),
11 | 	vec2(300, vp_size.y),
12 | 	vec2(0, vp_size.y)
13 | };
14 | 
15 | void main() {
16 | 	vec2 v = vs[gl_VertexID];
17 | 	gl_Position = vec4(
18 | 		v.x / vp_size.x * 2 - 1,
19 | 		v.y / vp_size.y * 2 - 1,
20 | 		0, 1 );
21 | }
22 | 


--------------------------------------------------------------------------------
/working_directory/shader/shared/curve.vert.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | uniform ivec2 vp_size;
 5 | 
 6 | layout(binding = 0) uniform samplerBuffer tb_vertex;
 7 | layout(binding = 1) uniform samplerBuffer tb_color;
 8 | 
 9 | flat out vec4 fragColor;
10 | 
11 | void main() {
12 | 	vec4 draw = texelFetch(tb_vertex, gl_VertexID);
13 | 	vec2 p = vec2(draw.x / vp_size.x, draw.y / vp_size.y) * 2 - vec2(1.0, 1.0);
14 | 	fragColor = texelFetch(tb_color, gl_VertexID);
15 | 	gl_Position = vec4(p.x, p.y, 0, 1);
16 | }
17 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/kernel/animation.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include <cstdint>
 5 | #include "../shared/ras_base.h"
 6 | 
 7 | namespace Mochimazui {
 8 | 
 9 | void vg_animation(
10 | 	int last_frame_timestamp,
11 | 	int next_frame_timestamp,
12 | 	RasterizerBase::VGInputCurveDataPack &_last_frame_curve_in,
13 | 	RasterizerBase::VGInputCurveDataPack &_next_frame_curve_in,
14 | 	RasterizerBase::VGInputPathDataPack &_last_frame_path_in,
15 | 	RasterizerBase::VGInputPathDataPack &_next_frame_path_in
16 | 	);
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_base.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <assert.h>
 5 | 
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | 
 9 | //#include <gpu/cutil.h>
10 | 
11 | #include "ras_define.h"
12 | #include "ras_cut.h"
13 | 
14 | //#define LAUNCH(kernel,N,NT,args) {kernel <<< divup(N,NT),NT >>>args;DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(#kernel);}
15 | //#define GET_ID() (blockDim.x * blockIdx.x + threadIdx.x)
16 | 
17 | #define DEV static __device__ inline
18 | #define BOTH __device__ __host__ inline
19 | 
20 | typedef long long i64;
21 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/tiger/tiger.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_TIGER_H_
 3 | #define _MOCHIMAZUI_TIGER_H_
 4 | 
 5 | #include <mochimazui/3rd/gl_4_5_core.h>
 6 | 
 7 | namespace Mochimazui {
 8 | 
 9 | 	namespace Tiger {
10 | 
11 | 		struct TigerStyle {
12 | 			GLuint fill_color;
13 | 			GLuint stroke_color;
14 | 			GLfloat stroke_width;
15 | 		};
16 | 
17 | 		extern const char *tiger_path[240];
18 | 		extern const TigerStyle tiger_style[240];
19 | 
20 | 		extern const unsigned int tiger_path_count;
21 | 		extern GLuint tiger_path_base;
22 | 
23 | 		void initTiger();
24 | 		void drawTiger(int filling, int stroking);
25 | 
26 | 	}
27 | }
28 | 
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/working_directory/shader/shared/integrate_samples.vert.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | // --------------------------------
 5 | //layout(location = 0) in vec2 vertex;
 6 | 
 7 | uniform ivec2 vp_size;
 8 | 
 9 | // --------------------------------
10 | void main() {
11 | 
12 | 	int width = vp_size.x;
13 | 	int height = vp_size.y;
14 | 
15 | 	vec2 v;
16 | 
17 | 	if (gl_VertexID == 0) {
18 | 		v = vec2(0.f, 0.f);
19 | 	}
20 | 	else if (gl_VertexID == 1) {
21 | 		v = vec2(0.f, height);
22 | 	}
23 | 	else if (gl_VertexID == 2) {
24 | 		v = vec2(width, height);
25 | 	}
26 | 	else {
27 | 		v = vec2(width, 0.f);
28 | 	}
29 | 
30 | 	v.x = v.x / width * 2.0 - 1.0;
31 | 	v.y = v.y / height * 2.0 - 1.0;
32 | 
33 | 	gl_Position = vec4(v.xy, 0.5, 1);
34 | }
35 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/thrust_impl.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_THRUST_IMPL_H_
 3 | #define _MOCHIMAZUI_THRUST_IMPL_H_
 4 | 
 5 | #include <cstdint>
 6 | 
 7 | namespace Mochimazui {
 8 | 
 9 | void thrust_exclusive_scan(int8_t *ibegin, uint32_t number, int8_t *obegin);
10 | void thrust_exclusive_scan(uint8_t *ibegin, uint32_t number, uint8_t *obegin);
11 | 
12 | void thrust_exclusive_scan(int32_t *ibegin, uint32_t number, int32_t *obegin);
13 | void thrust_exclusive_scan(uint32_t *ibegin, uint32_t number, uint32_t *obegin);
14 | 
15 | void thrust_exclusive_scan(float *ibegin, uint32_t number, float *obegin);
16 | 
17 | void thrust_inclusive_scan(int32_t *ibegin, uint32_t number, int32_t *obegin);
18 | void thrust_inclusive_scan(uint32_t *ibegin, uint32_t number, uint32_t *obegin);
19 | 
20 | }
21 | 
22 | #endif


--------------------------------------------------------------------------------
/working_directory/shader/shared/output_scale.vert.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | uniform ivec2 vp_size;
 5 | 
 6 | uniform ivec2 vp_translate;
 7 | uniform float vp_scale;
 8 | 
 9 | uniform mat4x4 o_tmat;
10 | 
11 | out vec2 texcoord;
12 | 
13 | vec2 v[4] = {
14 | 	vec2(0, 0),
15 | 	vec2(0, 1),
16 | 	vec2(1, 1),
17 | 	vec2(1, 0)
18 | };
19 | 
20 | void calc_texcoord() {
21 | 	vec4 ov = vec4(v[gl_VertexID], 0, 1);
22 | 
23 | 	ov.x *= vp_size.x;
24 | 	ov.y *= vp_size.y;
25 | 
26 | 	ov = inverse(o_tmat) * ov;
27 | 	ov /= ov.w;
28 | 
29 | 	ov.x /= vp_size.x;
30 | 	ov.y /= vp_size.y;
31 | 
32 | 	texcoord = ov.xy;
33 | }
34 | 
35 | void calc_position() {
36 | 	vec2 ov = v[gl_VertexID];
37 | 	//ov.y = 1.0 - ov.y;
38 | 	gl_Position = vec4(ov * 2 - vec2(1, 1), 0.0, 1.0);
39 | }
40 | 
41 | void main() {
42 | 	calc_texcoord();
43 | 	calc_position();
44 | }
45 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rvg.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MOCHIMAZUI_RVG_H_
 2 | #define _MOCHIMAZUI_RVG_H_
 3 | 
 4 | #include <string>
 5 | #include <memory>
 6 | 
 7 | #include "vg_container.h"
 8 | 
 9 | namespace Mochimazui {
10 | 
11 | 	using std::string;
12 | 
13 | 	class RVG {
14 | 
15 | 	public:
16 | 		void setA128(bool f) { _a128 = f; }
17 | 		void load(const string &fileName);
18 | 
19 | 		int32_t width(){ return _viewport[1].x; }
20 | 		int32_t height() { return _viewport[1].y; }
21 | 
22 | 		const std::shared_ptr<VGContainer> &vgContainer() const { return _spVGContainer; }
23 | 
24 | 		void saveSelectedPath(const std::vector<uint32_t> &pids);
25 | 
26 | 	private:
27 | 
28 | 		bool _a128 = false;
29 | 
30 | 		glm::ivec2 _viewport[2];
31 | 		glm::ivec2 _window[2];
32 | 
33 | 		std::shared_ptr<VGContainer> _spVGContainer;
34 | 
35 | 		std::string _header;
36 | 		std::vector<std::string> _lines;
37 | 	};
38 | }
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/file.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_FILE_H_
 3 | #define _MOCHIMAZUI_FILE_H_
 4 | 
 5 | #include <string>
 6 | #include <vector>
 7 | #include <array>
 8 | 
 9 | #include <mochimazui/stdio_ext.h>
10 | 
11 | namespace Mochimazui {
12 | 
13 | template<class charT>
14 | void readAll(const charT *fn, std::basic_string<charT> &odata) {
15 | 
16 | 	FILE *fin;
17 | 	fin = fopen(fn, "rb");
18 | 	if (!fin) {
19 | 		auto msg = "Error in readAll: can not open file \"" + std::string(fn) + "\"";
20 | 		stdext::error_printf("%s", fn);
21 | 		throw std::runtime_error(msg);
22 | 	}
23 | 
24 | 	fseek(fin, 0, SEEK_END);
25 | 	long size = ftell(fin);
26 | 
27 | 	charT *data = new charT[size + 1];
28 | 	if (!data) { printf("Error in readAll: new char returnd 0\n"); return; }
29 | 
30 | 	fseek(fin, 0, SEEK_SET);
31 | 	size_t size_read = fread(data, 1, size, fin);
32 | 	fclose(fin);
33 | 
34 | 	data[size] = '\0';
35 | 	odata = data;
36 | }
37 | 
38 | }
39 | 
40 | #endif


--------------------------------------------------------------------------------
/gpu-scanline-path-rendering-core.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.24720.0
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpu-scanline", "gpu-scanline\gpu-scanline.vcxproj", "{FD594DB2-BFC4-4F3E-BDF4-0C80C702BBF9}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|x64 = Debug|x64
11 | 		Release|x64 = Release|x64
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{FD594DB2-BFC4-4F3E-BDF4-0C80C702BBF9}.Debug|x64.ActiveCfg = Debug|x64
15 | 		{FD594DB2-BFC4-4F3E-BDF4-0C80C702BBF9}.Debug|x64.Build.0 = Debug|x64
16 | 		{FD594DB2-BFC4-4F3E-BDF4-0C80C702BBF9}.Release|x64.ActiveCfg = Release|x64
17 | 		{FD594DB2-BFC4-4F3E-BDF4-0C80C702BBF9}.Release|x64.Build.0 = Release|x64
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | EndGlobal
23 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_factory.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include "ras_base.h"
 5 | 
 6 | #include "rasterizer/R_cut_A_mask_comb_scanline/ras_cut_mask_comb_scanline.h"
 7 | 
 8 | #ifdef ENABLE_COMPARISON
 9 | #include "rasterizer/R_cut_A_none/ras_cut_no_aa.h"
10 | #include "rasterizer/R_cut_A_mask_sample_scanline/ras_cut_mask_sample_scanline.h"
11 | #include "rasterizer/R_cut_A_mask_pixel_scanline/ras_cut_mask_pixel_scanline.h"
12 | #include "rasterizer/c_cs_cuda_cell_list/ras_c_cs_cuda_cell_list.h"
13 | #include "rasterizer/c_cs_gl_cell_list/ras_c_cs_gl_cell_list.h"
14 | #endif
15 | 
16 | namespace Mochimazui {
17 | 
18 | inline std::shared_ptr<RasterizerBase::VGRasterizer> createRasterizer(RasterizerPipelineMode rpm) {
19 | 
20 | 	std::shared_ptr<RasterizerBase::VGRasterizer> p_ras;
21 | 
22 | 	if (rpm == PM_Cut_Mask_Comb_Scanline) {
23 | 		p_ras.reset(new Rasterizer_R_Cut_A_Mask_Comb_Scanline::VGRasterizer);
24 | 	}
25 | #ifdef ENABLE_COMPARISON
26 | #endif
27 | 	else {
28 | 		throw std::runtime_error("unsupported pipeline mode");
29 | 	}
30 | 
31 | 	return p_ras;
32 | }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016-2017 Zhejiang University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/bounding_box.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MOCHIMAZUI_BOUNDING_BOX_H_
 2 | #define _MOCHIMAZUI_BOUNDING_BOX_H_
 3 | 
 4 | #include <limits>
 5 | 
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | 
 9 | namespace Mochimazui {
10 | 
11 | 	struct BoundingBoxFloat {
12 | 
13 | 	public:
14 | 		__host__ __device__ BoundingBoxFloat() {
15 | 			v[0] = make_float2(1e32, 1e32);
16 | 			v[1] = -v[0];
17 | 		}
18 | 
19 | 		__host__ __device__ void update(const float2 &a) {
20 | 			v[0].x = min(v[0].x, a.x);
21 | 			v[0].y = min(v[0].y, a.y);
22 | 			v[1].x = max(v[1].x, a.x);
23 | 			v[1].y = max(v[1].y, a.y);
24 | 		}
25 | 
26 | 	public:
27 | 		float2 v[2];
28 | 	};
29 | 
30 | 	struct BoundingBoxInt {
31 | 
32 | 	public:
33 | 		__host__ __device__ BoundingBoxInt() {
34 | 			v[0] = make_int2(0x7FFFFFFF, 0x7FFFFFFF);
35 | 			v[1] = -v[0];
36 | 		}
37 | 
38 | 		__host__ __device__ void update(const int2 &a) {
39 | 			v[0].x = min(v[0].x, a.x);
40 | 			v[0].y = min(v[0].y, a.y);
41 | 			v[1].x = max(v[1].x, a.x);
42 | 			v[1].y = max(v[1].y, a.y);
43 | 		}
44 | 
45 | 	public:
46 | 		int2 v[2];
47 | 	};
48 | 
49 | 	typedef BoundingBoxFloat BBoxF;
50 | 	typedef BoundingBoxInt BBoxI;
51 | 
52 | }
53 | 
54 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_pipeline_mode.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include <string>
 5 | 
 6 | namespace Mochimazui {
 7 | 
 8 | // -------- -------- -------- -------- -------- -------- -------- --------
 9 | enum RasterizerPipelineMode {
10 | 	PM_Cut_No_AA,
11 | 	PM_Cut_Mask_Sample_Scanline,
12 | 	PM_Cut_Mask_Pixel_Scanline,
13 | 	PM_Cut_Mask_Comb_Scanline,
14 | };
15 | 
16 | // -------- -------- -------- -------- -------- -------- -------- --------
17 | typedef RasterizerPipelineMode VGPipelineMode;
18 | 
19 | // -------- -------- -------- -------- -------- -------- -------- --------
20 | inline std::string ras_pipeline_mode_to_string(RasterizerPipelineMode rpm) {	
21 | 	if (rpm == PM_Cut_No_AA) {
22 | 		return "cut fragment, no AA";
23 | 	}
24 | 	else if (rpm == PM_Cut_Mask_Sample_Scanline) {
25 | 		return "cut fragment, per sample scanline";
26 | 	}
27 | 	else if (rpm == PM_Cut_Mask_Pixel_Scanline) {
28 | 		return "cut fragment, per pixel scanline";
29 | 	}
30 | 	else if (rpm == PM_Cut_Mask_Comb_Scanline) {
31 | 		return "cut fragment, comb scanline";
32 | 	}
33 | 	else {
34 | 		throw std::runtime_error("ras_pipeline_mode_to_string: unsupported pipeline mode");
35 | 	}
36 | }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_scan.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_RASTERIZER_SHARED_SCAN_H_
 3 | #define _MOCHIMAZUI_RASTERIZER_SHARED_SCAN_H_
 4 | 
 5 | #include <cuda_runtime.h>
 6 | #include "thrust_impl.h"
 7 | 
 8 | namespace Mochimazui {
 9 | 
10 | namespace Rasterizer {
11 | 
12 | inline void escan_with_ret(int* p, int n, int *ret) {
13 | 	thrust_exclusive_scan((uint32_t*)p, n + 1, (uint32_t*)p);
14 | 	cudaMemcpy(ret, p + n, sizeof(int), cudaMemcpyDeviceToHost);
15 | }
16 | 
17 | inline int escan(int* p, int n) {
18 | 	int ret = 0;
19 | 	thrust_exclusive_scan((uint32_t*)p, n + 1, (uint32_t*)p);
20 | 	cudaMemcpy(&ret, p + n, sizeof(int), cudaMemcpyDeviceToHost);
21 | 	return ret;
22 | }
23 | 
24 | inline int escan(int* i, int *o, int n) {
25 | 	int ret = 0;
26 | 	thrust_exclusive_scan((uint32_t*)i, n + 1, (uint32_t*)o);
27 | 	cudaMemcpy(&ret, o + n, sizeof(int), cudaMemcpyDeviceToHost);
28 | 	return ret;
29 | }
30 | 
31 | inline int iscan(int* i, int *o, int n) {
32 | 	int ret = 0;
33 | 	thrust_inclusive_scan((uint32_t*)i, n, (uint32_t*)o);
34 | 	cudaMemcpy(&ret, o + n - 1, sizeof(int), cudaMemcpyDeviceToHost);
35 | 	return ret;
36 | }
37 | 
38 | } // end of namespace Rasterizers
39 | 
40 | } // end of namespace Mochimazui
41 | 
42 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/svg.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_SVG_H_
 3 | #define _MOCHIMAZUI_SVG_H_
 4 | 
 5 | #include <cstring>
 6 | #include <cstdint>
 7 | 
 8 | #include <map>
 9 | #include <vector>
10 | #include <functional>
11 | #include <memory>
12 | 
13 | #include <mochimazui/string.h>
14 | 
15 | #include <glm/glm.hpp>
16 | 
17 | #include "vg_container.h"
18 | 
19 | #include "gradient.h"
20 | 
21 | namespace Mochimazui {
22 | 
23 | 	struct SVG {
24 | 
25 | 	public: 
26 | 		//SVG();
27 | 		//~SVG();
28 | 
29 | 	public:
30 | 
31 | 		void setA128(bool f) { _a128 = f; }
32 | 
33 | 		void load(const stdext::string &fileName, bool gen_nvpr_path_commands = false);
34 | 		void save(const stdext::string &fileName);
35 | 
36 | 		const std::shared_ptr<VGContainer> &vgContainer() const { return _spVGContainer; }
37 | 		void setVg(std::shared_ptr<VGContainer> &pVg);
38 | 
39 | 	public:
40 | 		uint32_t width() { return _width; }
41 | 		uint32_t height() { return _height; }
42 | 
43 | 	private:
44 | 
45 | 		bool _a128 = false;
46 | 
47 | 		uint32_t _width = 0, _height = 0;
48 | 		glm::vec2 _viewBox[2];
49 | 
50 | 		//std::vector<Gradient> _gradients;
51 | 		//std::map<std::string, uint32_t> _gradientMap;
52 | 
53 | 		std::shared_ptr<VGContainer> _spVGContainer;
54 | 	};
55 | 
56 | }
57 | 
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/option.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_OPTION_H_
 3 | #define _MOCHIMAZUI_OPTION_H_
 4 | 
 5 | #include <vector>
 6 | #include <unordered_map>
 7 | #include "string.h"
 8 | 
 9 | namespace Mochimazui {
10 | 
11 | enum OptionType {
12 | 	Int,
13 | 	Float,
14 | 	String,
15 | 
16 | 	IntArray,
17 | 	FloatArray,
18 | 	StringArray,
19 | };
20 | 
21 | class OptionInfo {
22 | 	std::string name;
23 | 	std::string shortcut;
24 | 	OptionType valueType;
25 | 	std::string value;
26 | };
27 | 
28 | template <class T>
29 | class OptionWithPointer {
30 | };
31 | 
32 | template <class T>
33 | class OptionWithReference {
34 | };
35 | 
36 | // -------- -------- -------- -------- -------- -------- -------- --------
37 | class Option {
38 | 
39 | public:
40 | 	Option &addOption(const std::string &name, OptionType type) {
41 | 		return *this;
42 | 	}
43 | 
44 | 	template <class T>
45 | 	Option &addOption(const std::string &name, OptionType type, T*) {
46 | 		return *this;
47 | 	}
48 | 
49 | 	template <class T>
50 | 	Option &addOption(const std::string &name, OptionType type, T&) {
51 | 		return *this;
52 | 	}
53 | 
54 | 	//Option &addOption(const std::string &name, OptionType type, ) {}
55 | 
56 | public:
57 | 	Option &operator()(int argc, char *argv[]) {}
58 | 	Option &operator()(const std::string &fileName) {}
59 | 
60 | private:
61 | 
62 | };
63 | 
64 | }
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/bitmap.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_BITMAP_H_
 3 | #define _MOCHIMAZUI_BITMAP_H_
 4 | 
 5 | #include <cstdint>
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | #include <glm/vec4.hpp>
10 | 
11 | #include "color.h"
12 | 
13 | namespace Mochimazui {
14 | 
15 | 	struct Bitmap {
16 | 
17 | 	public:
18 | 		//Bitmap() {}
19 | 		//~Bitmap() {}
20 | 
21 | 	public:
22 | 		//void load(const std::string &fileName);
23 | 		bool save(const std::string &fileName);
24 | 
25 | 		void fill(const u8rgba &c) {
26 | 			for (uint32_t i = 0; i < _height; ++i) {
27 | 				for (uint32_t j = 0; j < _width; ++j) {
28 | 					pixel(j, i) = c;
29 | 				}
30 | 			}
31 | 		}
32 | 
33 | 		void resize(int w, int h) {
34 | 			_width = w;
35 | 			_height = h;
36 | 			_pixel.resize(h*w);
37 | 		}
38 | 
39 | 		uint32_t width() { return _width; }
40 | 		uint32_t height() { return _height; }
41 | 
42 | 		const unsigned char * data() { return (const unsigned char*)_pixel.data(); }
43 | 
44 | 	public:
45 | 
46 | 		void setPixel(int x, int y, const u8rgba &color) {
47 | 			if (0 <= x && x < (int)_width && 0 <= y && y < (int)_height) {
48 | 				_pixel[y * _width + x] = color;
49 | 			}
50 | 		}
51 | 
52 | 		u8rgba &pixel(int x, int y) {
53 | 			return _pixel[y * _width + x];
54 | 		}
55 | 
56 | 		const u8rgba &pixel(int x, int y) const {
57 | 			return _pixel[y * _width + x];
58 | 		}
59 | 
60 | 	private:
61 | 		uint32_t _width = 0;
62 | 		uint32_t _height = 0;
63 | 		std::vector<u8rgba> _pixel;
64 | 	};
65 | 
66 | }
67 | 
68 | #endif
69 | 
70 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/camera_2d.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MOCHIMAZUI_CAMERA_2D_H_
 2 | #define _MOCHIMAZUI_CAMERA_2D_H_
 3 | 
 4 | #include <glm/glm.hpp>
 5 | #include <glm/ext.hpp>
 6 | 
 7 | namespace Mochimazui {
 8 | 
 9 | 	using glm::vec2;
10 | 	using glm::mat3x3;
11 | 
12 | 	class Camera2D {
13 | 
14 | 		friend class CameraController2D;
15 | 
16 | 	public:
17 | 		Camera2D() {
18 | 		}
19 | 
20 | 	public:
21 | 
22 | 		void reset() {
23 | 			_matrix = mat3x3();
24 | 		}
25 | 
26 | 		//
27 | 		void translate(const vec2 &t) {
28 | 			translate(t.x, t.y);
29 | 		}
30 | 
31 | 		template <class T>
32 | 		void translate(const T &x, const T &y) {
33 | 			_matrix = mat3x3(
34 | 				1, 0, 0, 
35 | 				0, 1, 0,
36 | 				x, y, 1
37 | 				) * _matrix;
38 | 		}
39 | 
40 | 		//
41 | 		template <class T>
42 | 		void scale(const T &sx, const T &sy, const vec2 &cp = vec2(0.f, 0.f)) {
43 | 			translate(-cp);
44 | 			_matrix = mat3x3(
45 | 				sx, 0, 0,
46 | 				0, sy, 0,
47 | 				0, 0, 1
48 | 				) * _matrix;
49 | 			translate(cp);
50 | 		}
51 | 
52 | 		template <class T>
53 | 		void scale(const T &s, const vec2 &cp = vec2(0.f, 0.f)) {
54 | 			scale(s, s, cp);
55 | 		}
56 | 
57 | 		void scale(const vec2 &s, const vec2 &cp = vec2(0.f, 0.f)) {
58 | 			scale(s.x, s.y, cp);
59 | 		}
60 | 
61 | 		//
62 | 		void rotate(const vec2 &c, float a) {
63 | 		}
64 | 
65 | 		//
66 | 		glm::mat3x3 matrix() {
67 | 			return _matrix;
68 | 		}
69 | 
70 | 	private:
71 | 
72 | 		vec2 _scale;
73 | 		vec2 _translate;
74 | 
75 | 		glm::mat3x3 _matrix;
76 | 	};
77 | 
78 | }
79 | 
80 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/kernels/cubradixsort.cuh:
--------------------------------------------------------------------------------
 1 | #include "device/deviceutil.cuh"
 2 | #include "cub/cub.cuh"
 3 | 
 4 | namespace mgpu {
 5 | 
 6 | template<typename Key>
 7 | bool CubRadixSort(Key* keys_global, Key* keys2_global, int count, int beginBit,
 8 | 	int endBit, CudaContext& context) {
 9 | 
10 | 	cub::DoubleBuffer<Key> keys(keys_global, keys2_global);
11 | 
12 | 	size_t tempBytes = 0;
13 | 	cub::DeviceRadixSort::SortKeys(0, tempBytes, keys, count, beginBit, endBit,
14 | 		context.Stream());
15 | 
16 | 	MGPU_MEM(byte) tempDevice = context.Malloc<byte>(tempBytes);
17 | 
18 | 	cub::DeviceRadixSort::SortKeys(tempDevice->get(), tempBytes, keys, count,
19 | 		beginBit, endBit, context.Stream());
20 | 	MGPU_SYNC_CHECK("cub::DeviceRadixSort::SortKeys");
21 | 
22 | 	return 1 == keys.selector;
23 | }
24 | 
25 | template<typename Key, typename Value>
26 | bool CubRadixSort(Key* keys_global, Key* keys2_global, Value* values_global,
27 | 	Value* values2_global, int count, int beginBit, int endBit,
28 | 	CudaContext& context) {
29 | 
30 | 	cub::DoubleBuffer<Key> keys(keys_global, keys2_global);
31 | 	cub::DoubleBuffer<Value> values(values_global, values2_global);
32 | 
33 | 	size_t tempBytes = 0;
34 | 	cub::DeviceRadixSort::SortPairs(0, tempBytes, keys, values, count,
35 | 		beginBit, endBit, context.Stream());
36 | 
37 | 	MGPU_MEM(byte) tempDevice = context.Malloc<byte>(tempBytes);
38 | 
39 | 	cub::DeviceRadixSort::SortPairs(tempDevice->get(), tempBytes, keys, values, 
40 | 		count, beginBit, endBit, context.Stream());
41 | 	MGPU_SYNC_CHECK("cub::DeviceRadixSort::SortPairs");
42 | 
43 | 	return 1 == keys.selector;
44 | }
45 | 
46 | } // namespace mgpu
47 | 


--------------------------------------------------------------------------------
/working_directory/ui/ui.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "window": {
 3 |     "id": "top", "type": "window",
 4 |     "layout": "horizontal",
 5 |     "width": 1200, "height": 1080,
 6 |     "title": "GLGUI - Test",
 7 |     "subwindows": [
 8 |       {
 9 |         "id": "left-subwindow",
10 |         "type": "frame",
11 |         "width": 128,
12 |         "size-policy-x": "fix",
13 |         "border-width": 1,
14 |         "layout": "vertical",
15 |         "subwindows": [
16 |           { "id": "frag-size", "type": "label", "text": "Fragment size:" },
17 |           { "id": "frag-size-1", "type": "radio-button", "text": "1" },
18 |           { "id": "frag-size-2", "type": "radio-button", "text": "2" },
19 |           { "id": "frag-size-4", "type": "radio-button", "text": "4" },
20 |           { "id": "frag-size-8", "type": "radio-button", "text": "8" },
21 | 
22 |           { "id": "h-line-1", "type": "horizontal-line","line-width": 2 },
23 |           { "id": "empty", "type": "label", "text": "Empty" },
24 |           { "id": "tb", "type": "push-button", "text": "Empty" },
25 |           { "id": "tcb", "type": "check-box", "text": "Empty" },
26 | 
27 | 		  { "id": "h-line-2", "type": "horizontal-line","line-width": 2 },
28 | 		  { "id": "output-label", "type": "label", "text": "Output" },
29 | 		  { "id": "default", "type": "radio-button", "text": "Default" },
30 |           { "id": "per-sample-blend", "type": "radio-button", "text": "Per sample blend" }
31 | 
32 |         ]
33 |       },
34 |       { "id": "vline", "type": "vertical-line","line-width": 2 },
35 |       { "id": "display","type": "subwindow" }
36 |     ]
37 |   },
38 | 
39 |   "subwindows": {
40 |   }
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/working_directory/vg_default.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | // -------- -------- -------- -------- -------- -------- -------- --------
 3 | // config
 4 | 
 5 | --verbose
 6 | --minimal-ui
 7 | --show-fps
 8 | //--gl-debug
 9 | 
10 | --reserve-ink 16384
11 | //--reserve-ink 0
12 | //--draw-curve
13 | 
14 | --c-m-cs
15 | 
16 | --samples 32
17 | //--samples 8
18 | 
19 | --ms-output
20 | 
21 | --srgb
22 | //--lrgb
23 | 
24 | --window-width 1024
25 | --window-height 1024
26 | 
27 | //--animation
28 | //--window-width 1920
29 | //--window-height 1080
30 | 
31 | //--fit-to-window
32 | //--fit-to-vg
33 | 
34 | //--stroke-to-fill
35 | 
36 | //--save-output-file 
37 | //--o ./output/hehe.png
38 | 
39 | // -------- -------- -------- -------- -------- -------- -------- -------- 
40 | // RVG input
41 | 
42 | //-i ./input/rvg/embrace.rvg
43 | //-i ./input/rvg/tiger.rvg
44 | //-i ./input/rvg/reschart.rvg
45 | //-i ./input/rvg/hawaii.rvg
46 | //-i ./input/rvg/paper-1.rvg
47 | //-i ./input/rvg/chord.rvg
48 | //-i ./input/rvg/paris-30k.rvg
49 | //-i ./input/rvg/contour.rvg
50 | 
51 | //-i ./input/rvg/boston.rvg
52 | //-i ./input/rvg/paper-2.rvg
53 | //-i ./input/rvg/car.rvg
54 | //-i ./input/rvg/paris-50k.rvg
55 | //-i ./input/rvg/paris-70k.rvg
56 | 
57 | // -------- -------- -------- -------- -------- -------- -------- --------
58 | // test input
59 | 
60 | //-i ./input/quality_test/001.svg
61 | 
62 | //-i ./input/quality_test/101.svg
63 | //-i ./input/quality_test/102.svg
64 | //-i ./input/quality_test/103.svg
65 | //-i ./input/quality_test/104.svg
66 | 
67 | //-i ./input/quality_test/201.svg
68 | //-i ./input/quality_test/202.svg
69 | //-i ./input/quality_test/203.svg
70 | //-i ./input/quality_test/204.svg
71 | 
72 | //-i ./input/quality_test/301.svg
73 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/gradient.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _MOCHIMAZUI_GRADIENT_H_
 3 | #define _MOCHIMAZUI_GRADIENT_H_
 4 | 
 5 | #include <string>
 6 | #include <vector>
 7 | 
 8 | #include <glm/glm.hpp>
 9 | 
10 | #include <mochimazui/color.h>
11 | 
12 | namespace Mochimazui {
13 | 
14 | 	enum GradientType {
15 | 		GT_Linear = 2, 
16 | 		GT_Radial = 3
17 | 	};
18 | 
19 | 	enum GradientUnits {
20 | 		USER_SPACE_ON_USE,
21 | 		OBJECT_BOUNDING_BOX
22 | 	};
23 | 
24 | 	struct GradientStop {
25 | 		float offset;
26 | 		u8rgba color;
27 | 		float opacity = 1.f;
28 | 	};
29 | 
30 | 	// Gradient-related enumerations
31 | 	enum SpreadMethod {
32 | 		PAD,      // clamp to edge
33 | 		REFLECT,  // mirror
34 | 		REPEAT,   // repeat
35 | 		NONE      // clamp to border with (0,0,0,0) border
36 | 	};
37 | 
38 | 	inline bool operator < (const GradientStop &a, const GradientStop &b) {
39 | 		return a.offset < b.offset;
40 | 	}
41 | 
42 | 	struct Gradient {
43 | 
44 | 		GradientType gradient_type;
45 | 		GradientUnits gradient_units = USER_SPACE_ON_USE;
46 | 		glm::mat3x3 gradient_transform;  // could be float4x4
47 | 		SpreadMethod spread_method = PAD;
48 | 		std::vector<GradientStop> gradient_stops;
49 | 
50 | 		std::string href;
51 | 
52 | 		// Linear gradient attributes
53 | 		glm::vec2 v1, v2;
54 | 
55 | 		// Radial gradient attributes
56 | 		glm::vec2 c;  // center
57 | 		glm::vec2 f;  // focal point
58 | 		float r;   // radius
59 | 
60 | 		bool f_set = false;
61 | 
62 | 		void clear() {
63 | 			gradient_units = USER_SPACE_ON_USE;
64 | 			gradient_transform = glm::mat3x3();
65 | 			spread_method = PAD;
66 | 			gradient_stops.clear();
67 | 			href.clear();
68 | 
69 | 			f_set = false;
70 | 			v1 = v2 = c = f = glm::vec2();
71 | 			r = 0.f;
72 | 		}
73 | 
74 | 	};
75 | 
76 | }
77 | 
78 | #endif
79 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/camera_controller_2d.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MOCHIMAZUI_CAMERA_CONTROLLER_2D_
 2 | #define _MOCHIMAZUI_CAMERA_CONTROLLER_2D_
 3 | 
 4 | #include <map>
 5 | 
 6 | #include <glm/glm.hpp>
 7 | #include <glm/ext.hpp>
 8 | 
 9 | #include "camera_2d.h"
10 | 
11 | namespace Mochimazui {
12 | 
13 | 	using glm::vec2;
14 | 	using glm::ivec2;
15 | 	using std::map;
16 | 
17 | 	class CameraController2D : public Camera2D {
18 | 
19 | 	public:
20 | 
21 | 		void leftButtonDown(int x, int y) {
22 | 			_leftButton = true;
23 | 			_lastPos = _leftButtonClickPos = ivec2(x, y);
24 | 		}
25 | 
26 | 		void leftButtonUp(int x, int y) {
27 | 			_leftButton = false;
28 | 		}
29 | 
30 | 		void rightButtonDown(int x, int y) {
31 | 			_rightButton = true;
32 | 			_lastPos = _rightButtonClickPos = ivec2(x, y);
33 | 		}
34 | 
35 | 		void rightButtonUp(int x, int y) {
36 | 			_rightButton = false;
37 | 		}
38 | 
39 | 		void wheel(float dy) {
40 | 			wheel(0.f, dy);
41 | 		}
42 | 
43 | 		void wheel(float dx, float dy) {
44 | 			if (dy > 0) {
45 | 				scale(1.1f, _lastPos);
46 | 			}
47 | 			else {
48 | 				scale(0.9f, _lastPos);
49 | 			}			
50 | 		}
51 | 
52 | 		void move(int x, int y) {
53 | 			ivec2 cp(x, y);
54 | 			if (_leftButton || _rightButton) {
55 | 				auto delta = cp - _lastPos;
56 | 				Camera2D::translate(delta);
57 | 			}
58 | 			_lastPos = cp;
59 | 		}
60 | 
61 | 		void keyDown(int key) {
62 | 			_keyMap[key] = true;
63 | 		}
64 | 
65 | 		void keyUp(int key) {
66 | 			_keyMap[key] = false;
67 | 		}
68 | 
69 | 	private:
70 | 
71 | 	private:
72 | 
73 | 		bool _leftButton = false;
74 | 		bool _rightButton = false;
75 | 
76 | 		ivec2 _leftButtonClickPos;
77 | 		ivec2 _rightButtonClickPos;
78 | 
79 | 		ivec2 _lastPos;
80 | 
81 | 		map<int, bool> _keyMap;
82 | 	};
83 | }
84 | 
85 | #endif


--------------------------------------------------------------------------------
/working_directory/shader/shared/integrate_samples.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | layout(binding = 0) uniform sampler2DMS tex_msaa;
 5 | 
 6 | uniform bool enable_srgb_correction;
 7 | uniform int samples;
 8 | 
 9 | layout(location = 0) out vec4 out_color;
10 | 
11 | // -------- -------- -------- -------- -------- -------- -------- --------
12 | float lrgb_to_srgb_f(float f) {
13 | 	if (f <= 0.0031308f) { return 12.92f*f; }
14 | 	else { return (1.f + 0.055f)*pow(f, 1.f / 2.4f) - 0.055f; }
15 | }
16 | 
17 | vec4 lrgb_to_srgb(const vec4 c) {
18 | 	return vec4(
19 | 		lrgb_to_srgb_f(c.r),
20 | 		lrgb_to_srgb_f(c.g),
21 | 		lrgb_to_srgb_f(c.b),
22 | 		c.a
23 | 		);
24 | }
25 | 
26 | // -------- -------- -------- -------- -------- -------- -------- --------
27 | float srgb_to_lrgb_f(float f) {
28 | 	if (f <= 0.04045f) { return f / 12.92f; }
29 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
30 | }
31 | 
32 | vec4 srgb_to_lrgb(vec4 c) {
33 | 	return vec4(
34 | 		srgb_to_lrgb_f(c.r),
35 | 		srgb_to_lrgb_f(c.g),
36 | 		srgb_to_lrgb_f(c.b),
37 | 		c.a
38 | 		);
39 | }
40 | 
41 | // -------- -------- -------- -------- -------- -------- -------- --------
42 | void main() {
43 | 
44 | 	ivec2 xy = ivec2( gl_FragCoord.x,  gl_FragCoord.y);
45 | 	vec4 acc = vec4(0, 0, 0, 0);
46 | 
47 | 	//if (enable_srgb_correction) {
48 | 	if (true) {
49 | 	//if (false) {
50 | 		for (int i = 0; i<samples; i++) {
51 | 			vec4 ci = texelFetch(tex_msaa, xy, i);
52 | 			ci = srgb_to_lrgb(ci);
53 | 			acc += ci*(1.f / float(samples));
54 | 		}
55 | 
56 | 		acc = lrgb_to_srgb(acc);
57 | 		out_color = acc;
58 | 	}
59 | 	else {
60 | 		for (int i = 0; i<samples; i++) {
61 | 			vec4 ci = texelFetch(tex_msaa, xy, i);
62 | 			acc += ci*(1.f / float(samples));
63 | 		}
64 | 		out_color = acc;
65 | 	}
66 | }
67 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/thrust_impl_scan.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "thrust_impl.h"
 3 | 
 4 | #pragma warning( push, 0 ) 
 5 | #include <thrust/system/cuda/execution_policy.h>
 6 | #include <thrust/execution_policy.h>
 7 | #include <thrust/scan.h>
 8 | #include <thrust/device_vector.h>
 9 | #pragma warning( pop )
10 | 
11 | #include "cuda/cuda_cached_allocator.h"
12 | 
13 | namespace Mochimazui {
14 | 
15 | // -------- -------- -------- -------- -------- -------- -------- --------
16 | void thrust_exclusive_scan(int8_t *ibegin, uint32_t number, int8_t *obegin) {
17 | 	thrust::exclusive_scan(thrust::cuda::par(g_alloc),
18 | 		ibegin, ibegin + number, obegin, 0);
19 | }
20 | 
21 | void thrust_exclusive_scan(uint8_t *ibegin, uint32_t number, uint8_t *obegin) {
22 | 	thrust::exclusive_scan(thrust::cuda::par(g_alloc),
23 | 		ibegin, ibegin + number, obegin, 0);
24 | }
25 | 
26 | // -------- -------- -------- -------- -------- -------- -------- --------
27 | void thrust_exclusive_scan(int32_t *ibegin, uint32_t number, int32_t *obegin) {
28 | 	thrust::exclusive_scan(thrust::cuda::par(g_alloc),
29 | 		ibegin, ibegin + number, obegin, 0);
30 | }
31 | 
32 | void thrust_exclusive_scan(uint32_t *ibegin, uint32_t number, uint32_t *obegin) {
33 | 	thrust::exclusive_scan(thrust::cuda::par(g_alloc),
34 | 		ibegin, ibegin + number, obegin, 0);
35 | }
36 | 
37 | // -------- -------- -------- -------- -------- -------- -------- --------
38 | void thrust_exclusive_scan(float *ibegin, uint32_t number, float *obegin) {
39 | 	thrust::exclusive_scan(thrust::cuda::par(g_alloc),
40 | 		ibegin, ibegin + number, obegin, 0);
41 | }
42 | 
43 | // -------- -------- -------- -------- -------- -------- -------- --------
44 | void thrust_inclusive_scan(int32_t *ibegin, uint32_t number, int32_t *obegin) {
45 | 	thrust::inclusive_scan(thrust::cuda::par(g_alloc),
46 | 		ibegin, ibegin + number, obegin);
47 | }
48 | 
49 | void thrust_inclusive_scan(uint32_t *ibegin, uint32_t number, uint32_t *obegin) {
50 | 	thrust::inclusive_scan(thrust::cuda::par(g_alloc),
51 | 		ibegin, ibegin + number, obegin);
52 | }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/output_8.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 430
 3 | 
 4 | // -------- -------- -------- --------
 5 | layout(binding = 4) uniform samplerBuffer tex_table;
 6 | layout(binding = 5) uniform sampler2D tex_ramp;
 7 | layout(binding = 6) uniform sampler2DMS path_frag_tex;
 8 | 
 9 | uniform bool enable_srgb_correction;
10 | 
11 | // -------- -------- -------- --------
12 | flat in vec4 fragment_color;
13 | 
14 | flat in ivec2 path_frag_pos;
15 | 
16 | flat in int pixel_mask;
17 | 
18 | // -------- -------- -------- -------- -------- -------- -------- --------
19 | in vec3 gradient_coord_0;
20 | in vec3 gradient_coord_1;
21 | flat in vec3 gradient_ramp_coord;
22 | flat in vec3 gradient_focal_point;
23 | 
24 | // -------- -------- -------- --------
25 | layout(location = 0) out vec4 out_color;
26 | 
27 | // -------- -------- -------- --------
28 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
29 | 
30 | // -------- -------- -------- -------- -------- -------- -------- --------
31 | float srgb_to_lrgb_f(float f) {
32 | 	if (f <= 0.04045f) { return f / 12.92f; }
33 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
34 | }
35 | 
36 | vec4 srgb_to_lrgb(vec4 c) {
37 | 	return vec4(srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a);
38 | }
39 | 
40 | // -------- -------- -------- --------
41 | void main() {
42 | 
43 | 	ivec2 in_frag_pos = ivec2(gl_FragCoord.xy) - path_frag_pos;
44 | 
45 | 	if (gradient_ramp_coord.z > 0.0) {
46 | 		vec3 gradient_coord = in_frag_pos.y == 0 ? gradient_coord_0 : gradient_coord_1;
47 | 		vec2 d = gradient_coord.xy - gradient_focal_point.xy;
48 | 		float A = dot(d, d), B = dot(d, gradient_focal_point.xy);
49 | 		float c = min(A*safeRcpP(sqrt(B*B + A*gradient_focal_point.z) - B), 1.0);
50 | 		out_color = textureLod(tex_ramp, gradient_ramp_coord.xy + vec2(c*gradient_ramp_coord.z, 0.0), 0);
51 | 	}
52 | 	else {
53 | 		out_color = fragment_color;
54 | 	}
55 | 
56 | 	int mask_shift = (in_frag_pos.x * 2 + in_frag_pos.y) * 8;
57 | 	int count = bitCount((pixel_mask >> mask_shift) & 0xFF);
58 | 	out_color.a *= count / 8.0;
59 | }
60 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/ms_output_32.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 430
 3 | 
 4 | // -------- -------- -------- --------
 5 | layout(binding = 4) uniform samplerBuffer tex_table;
 6 | layout(binding = 5) uniform sampler2D tex_ramp;
 7 | layout(binding = 6) uniform sampler2DMS path_frag_tex;
 8 | 
 9 | uniform bool enable_srgb_correction;
10 | 
11 | // -------- -------- -------- --------
12 | flat in vec4 fragment_color;
13 | 
14 | flat in ivec2 path_frag_pos;
15 | 
16 | flat in ivec4 pixel_mask;
17 | 
18 | // -------- -------- -------- -------- -------- -------- -------- --------
19 | in vec3 gradient_coord_0;
20 | in vec3 gradient_coord_1;
21 | flat in vec3 gradient_ramp_coord;
22 | flat in vec3 gradient_focal_point;
23 | 
24 | // -------- -------- -------- --------
25 | layout(location = 0) out vec4 out_color;
26 | 
27 | // -------- -------- -------- --------
28 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
29 | 
30 | // -------- -------- -------- -------- -------- -------- -------- --------
31 | float srgb_to_lrgb_f(float f) {
32 | 	if (f <= 0.04045f) { return f / 12.92f; }
33 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
34 | }
35 | 
36 | vec4 srgb_to_lrgb(vec4 c) {
37 | 	return vec4( srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a );
38 | }
39 | 
40 | // -------- -------- -------- --------
41 | void main() {
42 | 
43 | 	ivec2 in_frag_pos = ivec2(gl_FragCoord.xy) - path_frag_pos;
44 | 
45 | 	if (gradient_ramp_coord.z > 0.0) {
46 | 		vec3 gradient_coord = in_frag_pos.y == 0 ? gradient_coord_0 : gradient_coord_1;
47 | 		vec2 d = gradient_coord.xy - gradient_focal_point.xy;
48 | 		float A = dot(d, d), B = dot(d, gradient_focal_point.xy);
49 | 		float c = min(A*safeRcpP(sqrt(B*B + A*gradient_focal_point.z) - B), 1.0);
50 | 		out_color = textureLod(tex_ramp, gradient_ramp_coord.xy + vec2(c*gradient_ramp_coord.z, 0.0), 0);
51 | 	}
52 | 	else {
53 | 		out_color = fragment_color;
54 | 	}
55 | 
56 | 	ivec2 sub_pixel_index = ivec2(mod(gl_FragCoord, 1) * 2);
57 | 
58 | 	int mask_index = in_frag_pos.x * 2 + in_frag_pos.y;
59 | 	gl_SampleMask[0] = pixel_mask[mask_index];
60 | }
61 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/ms_output_8.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 430
 3 | 
 4 | // -------- -------- -------- --------
 5 | layout(binding = 4) uniform samplerBuffer tex_table;
 6 | layout(binding = 5) uniform sampler2D tex_ramp;
 7 | layout(binding = 6) uniform sampler2DMS path_frag_tex;
 8 | 
 9 | uniform bool enable_srgb_correction;
10 | 
11 | // -------- -------- -------- --------
12 | flat in vec4 fragment_color;
13 | 
14 | flat in ivec2 path_frag_pos;
15 | 
16 | flat in int pixel_mask;
17 | 
18 | // -------- -------- -------- -------- -------- -------- -------- --------
19 | in vec3 gradient_coord_0;
20 | in vec3 gradient_coord_1;
21 | flat in vec3 gradient_ramp_coord;
22 | flat in vec3 gradient_focal_point;
23 | 
24 | // -------- -------- -------- --------
25 | layout(location = 0) out vec4 out_color;
26 | 
27 | // -------- -------- -------- --------
28 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
29 | 
30 | // -------- -------- -------- -------- -------- -------- -------- --------
31 | float srgb_to_lrgb_f(float f) {
32 | 	if (f <= 0.04045f) { return f / 12.92f; }
33 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
34 | }
35 | 
36 | vec4 srgb_to_lrgb(vec4 c) {
37 | 	return vec4( srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a );
38 | }
39 | 
40 | // -------- -------- -------- --------
41 | void main() {
42 | 
43 | 	ivec2 in_frag_pos = ivec2(gl_FragCoord.xy) - path_frag_pos;
44 | 
45 | 	if (gradient_ramp_coord.z > 0.0) {
46 | 		vec3 gradient_coord = in_frag_pos.y == 0 ? gradient_coord_0 : gradient_coord_1;
47 | 		vec2 d = gradient_coord.xy - gradient_focal_point.xy;
48 | 		float A = dot(d, d), B = dot(d, gradient_focal_point.xy);
49 | 		float c = min(A*safeRcpP(sqrt(B*B + A*gradient_focal_point.z) - B), 1.0);
50 | 		out_color = textureLod(tex_ramp, gradient_ramp_coord.xy + vec2(c*gradient_ramp_coord.z, 0.0), 0);
51 | 	}
52 | 	else {
53 | 		out_color = fragment_color;
54 | 	}
55 | 
56 | 	ivec2 sub_pixel_index = ivec2(mod(gl_FragCoord, 1) * 2);
57 | 
58 | 	int mask_shift = (in_frag_pos.x * 2 + in_frag_pos.y) * 8;
59 | 
60 | 	gl_SampleMask[0] = (pixel_mask >> mask_shift) & 0xFF;
61 | }
62 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/output_32.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 430
 3 | 
 4 | // -------- -------- -------- --------
 5 | layout(binding = 4) uniform samplerBuffer tex_table;
 6 | layout(binding = 5) uniform sampler2D tex_ramp;
 7 | layout(binding = 6) uniform sampler2DMS path_frag_tex;
 8 | 
 9 | uniform bool enable_srgb_correction;
10 | 
11 | // -------- -------- -------- --------
12 | flat in vec4 fragment_color;
13 | 
14 | flat in ivec2 path_frag_pos;
15 | 
16 | flat in ivec4 pixel_mask;
17 | 
18 | // -------- -------- -------- -------- -------- -------- -------- --------
19 | in vec3 gradient_coord_0;
20 | in vec3 gradient_coord_1;
21 | flat in vec3 gradient_ramp_coord;
22 | flat in vec3 gradient_focal_point;
23 | 
24 | // -------- -------- -------- --------
25 | layout(location = 0) out vec4 out_color;
26 | 
27 | // -------- -------- -------- --------
28 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
29 | 
30 | // -------- -------- -------- -------- -------- -------- -------- --------
31 | float srgb_to_lrgb_f(float f) {
32 | 	if (f <= 0.04045f) { return f / 12.92f; }
33 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
34 | }
35 | 
36 | vec4 srgb_to_lrgb(vec4 c) {
37 | 	return vec4(srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a);
38 | }
39 | 
40 | // -------- -------- -------- --------
41 | void main() {
42 | 
43 | 	ivec2 in_frag_pos = ivec2(gl_FragCoord.xy) - path_frag_pos;
44 | 
45 | 	if (gradient_ramp_coord.z > 0.0) {
46 | 		vec3 gradient_coord = in_frag_pos.y == 0 ? gradient_coord_0 : gradient_coord_1;
47 | 		vec2 d = gradient_coord.xy - gradient_focal_point.xy;
48 | 		float A = dot(d, d), B = dot(d, gradient_focal_point.xy);
49 | 		float c = min(A*safeRcpP(sqrt(B*B + A*gradient_focal_point.z) - B), 1.0);
50 | 		out_color = textureLod(tex_ramp, gradient_ramp_coord.xy + vec2(c*gradient_ramp_coord.z, 0.0), 0);
51 | 	}
52 | 	else {
53 | 		out_color = fragment_color;
54 | 	}
55 | 
56 | 	ivec2 sub_pixel_index = ivec2(mod(gl_FragCoord, 1) * 2);
57 | 
58 | 	int mask_index = in_frag_pos.x * 2 + in_frag_pos.y;
59 | 	
60 | 	int count = bitCount(pixel_mask[mask_index]);
61 | 
62 | 	out_color.a *= count / 32.0;
63 | }
64 | 


--------------------------------------------------------------------------------
/working_directory/shader/shared/fps.frag.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 450
 3 | 
 4 | uniform ivec2 vp_size;
 5 | uniform int fps;
 6 | 
 7 | #define SIZE 8
 8 | 
 9 | layout(location = 0) out vec4 color; 
10 | 
11 | int fps_mask[3] = {
12 | 	0x13CF, // 001 0011 1100 1111
13 | 	0x13EF, // 001 0011 1110 1111
14 | 	0x79CF // 111 1001 1100 1111
15 | };
16 | 
17 | int number_mask[10] = {
18 | 	0x7B6F, // 111 1011 0110 1111
19 | 	0x4924, // 100 1001 0010 0100
20 | 	0x73E7, // 111 0011 1110 0111
21 | 	0x79E7, // 111 1001 1110 0111
22 | 	0x49ED, // 100 1001 1110 1101
23 | 	0x79CF, // 111 1001 1100 1111
24 | 	0x7BCF, // 111 1011 1100 1111
25 | 	0x4927, // 100 1001 0010 0111
26 | 	0x7BEF, // 111 1011 1110 1111
27 | 	0x79EF  // 111 1001 1110 1111
28 | };
29 | 
30 | int mask = 0;
31 | 
32 | bool check(int x, int y) {
33 | 	if (y < 0 || y >=(SIZE * 5) || x < 0 || x >=( SIZE * 3)) { 
34 | 		return false;
35 | 	}
36 | 	x /= SIZE;
37 | 	y /= SIZE;
38 | 	return (mask >> (y * 3 + x) & 1) == 1;
39 | }
40 | 
41 | void main(){ 
42 | 
43 | 	vec2 pos;
44 | 
45 | 	if (gl_FragCoord.x < SIZE * 2) { discard; }
46 | 
47 | 	pos.x = gl_FragCoord.x - SIZE * 2;
48 | 	pos.y = vp_size.y - gl_FragCoord.y - SIZE * 2;
49 | 
50 | 	int char_index = int((pos.x / (SIZE * 4)) + 1);
51 | 	pos.x = mod(pos.x, (SIZE * 4));
52 | 
53 | 	if (pos.y < 0 || pos.y > (SIZE * 5) || pos.x < 0 || pos.x > SIZE * 3) { discard; }
54 | 
55 | 	color = vec4(1, 1, 1, 1);
56 | 
57 | 	if (char_index > 4 && char_index <=7) {
58 | 		mask = fps_mask[char_index - 5];
59 | 	}
60 | 	else if(char_index <=3) {
61 | 		if (char_index == 1 && fps >= 100) {
62 | 			mask = number_mask[fps / 100];
63 | 		}
64 | 		if (char_index == 2 && fps >= 10) {
65 | 			mask = number_mask[(fps / 10) % 10];
66 | 		}
67 | 		if(char_index == 3) {
68 | 			mask = number_mask[fps % 10];
69 | 		}
70 | 	}
71 | 	else {
72 | 		mask = 0;
73 | 	}
74 | 
75 | 	if (check(int(pos.x), int(pos.y))) {
76 | 
77 | 		bool flag = true;
78 | 
79 | 		for (int dx = -2; dx <= 2; ++dx) {
80 | 			for (int dy = -2; dy <= 2; ++dy) {
81 | 				flag = flag && check(
82 | 					int(pos.x) + dx, int(pos.y) + dy);
83 | 			}
84 | 		}
85 | 
86 | 		if (!flag) {
87 | 			color = vec4(0, 0, 0, 1);
88 | 		}
89 | 	}
90 | 	else {
91 | 		discard;
92 | 	}
93 | };
94 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/ms_output_32.frag.glsl.before_368.22:
--------------------------------------------------------------------------------
 1 | 
 2 | #version 430
 3 | 
 4 | // -------- -------- -------- --------
 5 | layout(binding = 4) uniform samplerBuffer tex_table;
 6 | layout(binding = 5) uniform sampler2D tex_ramp;
 7 | layout(binding = 6) uniform sampler2DMS path_frag_tex;
 8 | 
 9 | uniform bool enable_srgb_correction;
10 | 
11 | // -------- -------- -------- --------
12 | flat in vec4 fragment_color;
13 | 
14 | flat in ivec2 path_frag_pos;
15 | 
16 | flat in ivec4 pixel_mask;
17 | 
18 | // -------- -------- -------- -------- -------- -------- -------- --------
19 | in vec3 gradient_coord_0;
20 | in vec3 gradient_coord_1;
21 | flat in vec3 gradient_ramp_coord;
22 | flat in vec3 gradient_focal_point;
23 | 
24 | // -------- -------- -------- --------
25 | layout(location = 0) out vec4 out_color;
26 | 
27 | // -------- -------- -------- --------
28 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
29 | 
30 | // -------- -------- -------- -------- -------- -------- -------- --------
31 | float srgb_to_lrgb_f(float f) {
32 | 	if (f <= 0.04045f) { return f / 12.92f; }
33 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
34 | }
35 | 
36 | vec4 srgb_to_lrgb(vec4 c) {
37 | 	return vec4( srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a );
38 | }
39 | 
40 | // -------- -------- -------- --------
41 | void main() {
42 | 
43 | 	ivec2 in_frag_pos = ivec2(gl_FragCoord.xy) - path_frag_pos;
44 | 
45 | 	if (gradient_ramp_coord.z > 0.0) {
46 | 		vec3 gradient_coord = in_frag_pos.y == 0 ? gradient_coord_0 : gradient_coord_1;
47 | 		vec2 d = gradient_coord.xy - gradient_focal_point.xy;
48 | 		float A = dot(d, d), B = dot(d, gradient_focal_point.xy);
49 | 		float c = min(A*safeRcpP(sqrt(B*B + A*gradient_focal_point.z) - B), 1.0);
50 | 		out_color = textureLod(tex_ramp, gradient_ramp_coord.xy + vec2(c*gradient_ramp_coord.z, 0.0), 0);
51 | 	}
52 | 	else {
53 | 		out_color = fragment_color;
54 | 	}
55 | 
56 | 	ivec2 sub_pixel_index = ivec2(mod(gl_FragCoord, 1) * 2);
57 | 
58 | 	int mask_index = in_frag_pos.x * 2 + in_frag_pos.y;
59 | 	int mask_shift = (sub_pixel_index.x * 2 + sub_pixel_index.y) * 8;
60 | 
61 | 	gl_SampleMask[0] = pixel_mask[mask_index] >> mask_shift;
62 | }
63 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/camera_controller_3d.h:
--------------------------------------------------------------------------------
  1 | #ifndef _MOCHIMAZUI_CAMERA_CONTROLLER_3D_
  2 | #define _MOCHIMAZUI_CAMERA_CONTROLLER_3D_
  3 | 
  4 | #include <map>
  5 | 
  6 | #include <glm/glm.hpp>
  7 | #include <glm/ext.hpp>
  8 | 
  9 | #include "camera_3d.h"
 10 | 
 11 | namespace Mochimazui {
 12 | 
 13 | 	using glm::vec2;
 14 | 	using glm::ivec2;
 15 | 	using std::map;
 16 | 
 17 | 	enum CameraControllerMode {
 18 | 		CCM_NULL,
 19 | 		CCM_MOVE,
 20 | 		CCM_TURN,
 21 | 		CCM_ROTATE,
 22 | 		CCM_WALK,
 23 | 	};
 24 | 
 25 | 	class CameraController3D : public Camera3D {
 26 | 
 27 | 	public:
 28 | 		void setControllerMode(int nm) {
 29 | 			_controllerMode = nm;
 30 | 		}
 31 | 
 32 | 	public:
 33 | 
 34 | 		void init(int width, int height);
 35 | 		void fitToView(int vWidth, int vHeight);
 36 | 
 37 | 	public:
 38 | 		void leftButtonDown(int x, int y);
 39 | 		void leftButtonUp(int x, int y);
 40 | 
 41 | 		void rightButtonDown(int x, int y);
 42 | 		void rightButtonUp(int x, int y);
 43 | 
 44 | 		void move(int x, int y);
 45 | 
 46 | 		void wheel(float dy) {
 47 | 			wheel(0.f, dy);
 48 | 		}
 49 | 
 50 | 		void wheel(float dx, float dy) {
 51 | 			if (dy > 0) {
 52 | 				scale(1.05f, glm::vec3(_lastPos.x, _lastPos.y, 0.0f));
 53 | 			}
 54 | 			else {
 55 | 				scale(0.95f, glm::vec3(_lastPos.x, _lastPos.y, 0.0f));
 56 | 			}
 57 | 		}
 58 | 
 59 | 		void keyDown(int key) {
 60 | 			_keyMap[key] = true;
 61 | 		}
 62 | 
 63 | 		void keyUp(int key) {
 64 | 			_keyMap[key] = false;
 65 | 		}
 66 | 
 67 | 		glm::mat4x4 modelViewMatrix();
 68 | 		glm::mat4x4 projectionMatrix();
 69 | 
 70 | 	private:
 71 | 
 72 | 		void handleMove(int x, int y);
 73 | 		void handleTurn(int x, int y);
 74 | 		void handleRotate(int x, int y);
 75 | 
 76 | 	public:
 77 | 
 78 | 		int _controllerMode = CCM_MOVE;
 79 | 
 80 | 		bool _leftButton = false;
 81 | 		bool _rightButton = false;
 82 | 
 83 | 		ivec2 _leftButtonClickPos;
 84 | 		ivec2 _rightButtonClickPos;
 85 | 
 86 | 		ivec2 _lastPos;
 87 | 
 88 | 		int _sceneWidth;
 89 | 		int _sceneHeight;
 90 | 
 91 | 		map<int, bool> _keyMap;
 92 | 
 93 | 		glm::vec3 _rotateCenter;
 94 | 		glm::vec3 _rotateNormal;
 95 | 		glm::vec3 _walky;
 96 | 		glm::vec3 _walkx;
 97 | 
 98 | 	};
 99 | }
100 | 
101 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/stdio_ext.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #ifndef _MOCHIMAZUI_STDIO_EXT_
 5 | #define _MOCHIMAZUI_STDIO_EXT_
 6 | 
 7 | #if defined _WIN32 || defined _WIN64
 8 | #define MOCHIMAZUI_WINDOWS
 9 | #include <Windows.h>
10 | #undef min
11 | #undef max
12 | #endif
13 | 
14 | namespace Mochimazui {
15 | namespace stdext {
16 | 
17 | namespace stdext_private {
18 | #ifdef MOCHIMAZUI_WINDOWS
19 | inline HANDLE console_handle() {
20 | 	static HANDLE s_h_console = GetStdHandle(STD_OUTPUT_HANDLE);
21 | 	return s_h_console;
22 | }
23 | inline void set_console_text_attribute(WORD a) {
24 | 	//#define FOREGROUND_BLUE      0x0001 // text color contains blue.
25 | 	//#define FOREGROUND_GREEN     0x0002 // text color contains green.
26 | 	//#define FOREGROUND_RED       0x0004 // text color contains red.
27 | 	//#define FOREGROUND_INTENSITY 0x0008 // text color is intensified.
28 | 	//#define BACKGROUND_BLUE      0x0010 // background color contains blue.
29 | 	//#define BACKGROUND_GREEN     0x0020 // background color contains green.
30 | 	//#define BACKGROUND_RED       0x0040 // background color contains red.
31 | 	//#define BACKGROUND_INTENSITY 0x0080 // background color is intensified.
32 | 	SetConsoleTextAttribute(console_handle(), a);
33 | }
34 | #else
35 | inline void set_console_text_attribute(uint32_t) {
36 | }
37 | #endif
38 | }
39 | 
40 | template<typename ...Ts> 
41 | inline void color_printf(int text_color, int background_color, const char *fmt_str, Ts... args) {
42 | 	stdext_private::set_console_text_attribute((background_color << 4) | text_color);
43 | 	printf(fmt_str, args...);
44 | 	stdext_private::set_console_text_attribute(7);
45 | }
46 | 
47 | template<typename ...Ts>
48 | inline void error_printf(const char *fmt_str, Ts... args) {
49 | 	stdext_private::set_console_text_attribute((12 << 4) | 15);
50 | 	fprintf(stderr, fmt_str, args...);
51 | 	stdext_private::set_console_text_attribute(7);
52 | }
53 | 
54 | template<typename ...Ts>
55 | inline void warning_printf(const char *fmt_str, Ts... args) {
56 | 	stdext_private::set_console_text_attribute((6 << 4) | 15);
57 | 	fprintf(stderr, fmt_str, args...);
58 | 	stdext_private::set_console_text_attribute(7);
59 | }
60 | 
61 | template<typename ...Ts>
62 | inline void info_printf(const char *fmt_str, Ts... args) {
63 | 	stdext_private::set_console_text_attribute((10 << 4) | 15);
64 | 	fprintf(stderr, fmt_str, args...);
65 | 	stdext_private::set_console_text_attribute(7);
66 | }
67 | 
68 | }
69 | }
70 | 
71 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/bitmap.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "bitmap.h"
  3 | 
  4 | #include <cstdint>
  5 | 
  6 | #if defined _WIN32 || defined _WIN64
  7 | #include <Windows.h>
  8 | #endif
  9 | 
 10 | namespace Mochimazui {
 11 | 
 12 | 	using std::string;
 13 | 
 14 | #if defined _WIN32 || defined _WIN64
 15 | #else
 16 | 	typedef int32_t WORD;
 17 | 	typedef int64_t DWORD;
 18 | 	typedef int64_t LONG;
 19 | 
 20 | 	typedef struct tagBITMAPFILEHEADER {
 21 | 		WORD  bfType;
 22 | 		DWORD bfSize;
 23 | 		WORD  bfReserved1;
 24 | 		WORD  bfReserved2;
 25 | 		DWORD bfOffBits;
 26 | 	} BITMAPFILEHEADER, *PBITMAPFILEHEADER;
 27 | 
 28 | 	typedef struct tagBITMAPINFOHEADER {
 29 | 		DWORD biSize;
 30 | 		LONG  biWidth;
 31 | 		LONG  biHeight;
 32 | 		WORD  biPlanes;
 33 | 		WORD  biBitCount;
 34 | 		DWORD biCompression;
 35 | 		DWORD biSizeImage;
 36 | 		LONG  biXPelsPerMeter;
 37 | 		LONG  biYPelsPerMeter;
 38 | 		DWORD biClrUsed;
 39 | 		DWORD biClrImportant;
 40 | 	} BITMAPINFOHEADER, *PBITMAPINFOHEADER;
 41 | #endif
 42 | 
 43 | 	//void Bitmap::load(const std::string &fileName) {
 44 | 	//}
 45 | 
 46 | 	bool Bitmap::save(const std::string &fileName) {
 47 | 
 48 | 		BITMAPFILEHEADER bfh;
 49 | 		BITMAPINFOHEADER bih;
 50 | 
 51 | 		// fill info header
 52 | 		bih.biSize = sizeof(BITMAPINFOHEADER);
 53 | 
 54 | 		bih.biWidth = _width;
 55 | 		bih.biHeight = _height;
 56 | 		bih.biPlanes = 1;
 57 | 
 58 | 		bih.biBitCount = 32;
 59 | 		bih.biCompression = 0;
 60 | 		bih.biSizeImage = _width*_height * 4;
 61 | 
 62 | 		bih.biXPelsPerMeter = 1;
 63 | 		bih.biYPelsPerMeter = 1;
 64 | 
 65 | 		bih.biClrUsed = 0;
 66 | 		bih.biClrImportant = 0;
 67 | 
 68 | 		// fill file header
 69 | 		bfh.bfType = 0x4D42;
 70 | 		bfh.bfReserved1 = 0;
 71 | 		bfh.bfReserved2 = 0;
 72 | 		bfh.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
 73 | 		bfh.bfSize = bfh.bfOffBits + bih.biSizeImage;
 74 | 
 75 | 		FILE* hFile;
 76 | #ifdef _MSC_VER
 77 | 		fopen_s(&hFile, fileName.c_str(), "wb");
 78 | #else
 79 | 		hFile = fopen(fileName.c_str(), "wb");
 80 | #endif
 81 | 		if (!hFile) { return false; }
 82 | 
 83 | 		fwrite(&bfh, sizeof(BITMAPFILEHEADER), 1, hFile);
 84 | 		fwrite(&bih, sizeof(BITMAPINFOHEADER), 1, hFile);
 85 | 
 86 | 		auto outputPixel = _pixel;
 87 | 		//for (uint32_t y = 0; y < _height; ++y) {
 88 | 		//	auto y0 = y;
 89 | 		//	auto y1 = _height - y0 - 1;
 90 | 		//	for (uint32_t x = 0; x < _width; ++x) {
 91 | 		//		outputPixel[y0 * _width + x] = _pixel[y1 * _width + x];
 92 | 		//	}
 93 | 		//}
 94 | 
 95 | 		fwrite(outputPixel.data(), bih.biSizeImage, 1, hFile);
 96 | 		fclose(hFile);
 97 | 
 98 | 		return true;
 99 | 	}
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient GPU Path Rendering Using Scanline Rasterization
 2 | 
 3 | SIGGRAPH Asia 2016 project.
 4 | 
 5 | Please download code together with dependency and test data from http://gaps-zju.org/pathrendering/
 6 | 
 7 | ## Pre-built Binary 
 8 | 
 9 | ./x64/release/gpu-scanline.exe. Tested on 64 bit Windows 10. Copy to ./working_directory to run. <br/>
10 | Built with Visual Studio 2013 and CUDA 7.5. <br/>
11 | Requires a NVIDIA graphics card with CUDA sm_50 and OpenGL 4.5 support.
12 | 
13 | Right button: move. <br/>
14 | Mouse wheel: scale. <br/>
15 | Left button: draw.
16 | 
17 | ## Build Dependency
18 | 
19 | * Visual Studio 2013/2015
20 | * CUDA 7.5/8.0, lower versions may work as well.
21 | * Thrust
22 | 
23 | ----
24 | Open source code and pre-built binaries included in /3rd
25 | 
26 | * [SDL 2.0.3](https://www.libsdl.org/) for basic window system and UI.
27 | * [Boost 1.60.0](http://www.boost.org/) for command line options.
28 | 
29 | Libraries are built on a 64-bit Windows 10 system with Visual Studio 2013.
30 | You may need to download or build these libraries on your own system.
31 | 
32 | ----
33 | Other included open source code
34 | 
35 | * [Modern GPU](https://nvlabs.github.io/moderngpu/) for segmented sort.
36 | * [glm 0.9.6.3](http://www.g-truc.net/) for vector and matrix.
37 | * [stb](https://github.com/nothings/stb) for image and font.
38 | * [rapidxml](https://github.com/dwd/rapidxml) for SVG parsing.
39 | 
40 | ----
41 | Code generator used
42 | 
43 | * [glLoadGen](https://bitbucket.org/alfonse/glloadgen/wiki/Home) for OpenGL functions.
44 | 
45 | ## Build
46 | 
47 | Open in Visual Studio. <br/>
48 | Check if "Properties -> CUDA C/C++ -> Device -> Code Generation" matches your device. <br/>
49 | Then build.
50 | 
51 | ## Run
52 | 
53 | * Start in Visual Studio: set "Debugging -> Working Directory" to $(SolutionDir)working_directory. <br/>
54 | * Start in explorer or command line: copy exe file to working_directory, or create shortcut. <br/>
55 | 
56 | The program loads ./vg_default.cfg by default. Run with --help or check cmd files in working directory for more detail.
57 | 
58 | ## Data
59 | 
60 | RVG files in ./input/rvg from [MPVG](http://w3.impa.br/~diego/projects/GanEtAl14/). <br/>
61 | Works on SVG files with a subset of features (see the paper for details).
62 | 
63 | ## Driver Issue
64 | 
65 | At the time we release the code, we are using driver 368.81, and everything runs well.
66 | 
67 | We found the behavior of gl_SampleMask in GLSL has changed since NVIDIA driver version 368.22.
68 | While the old behaviour was inconsistent with OpenGL standard, we assume it was a driver bug,
69 | or a result of incorrect graphics card configuration.
70 | Using drivers earlier than this version may get incorrect rendering results.
71 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/moderngpu.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
 3 |  * 
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  * 
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | 
28 | /******************************************************************************
29 |  *
30 |  * Code and text by Sean Baxter, NVIDIA Research
31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
32 |  *
33 |  ******************************************************************************/
34 | 
35 | #pragma once
36 | 
37 | #include "kernels/reduce.cuh"
38 | #include "kernels/scan.cuh"
39 | #include "kernels/bulkremove.cuh"
40 | #include "kernels/bulkinsert.cuh"
41 | #include "kernels/merge.cuh"
42 | #include "kernels/mergesort.cuh"
43 | #include "kernels/segmentedsort.cuh"
44 | #include "kernels/localitysort.cuh"
45 | #include "kernels/sortedsearch.cuh"
46 | #include "kernels/loadbalance.cuh"
47 | #include "kernels/intervalmove.cuh"
48 | #include "kernels/join.cuh"
49 | #include "kernels/sets.cuh"
50 | #include "kernels/segreducecsr.cuh"
51 | #include "kernels/reducebykey.cuh"
52 | #include "kernels/spmvcsr.cuh"
53 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/mgpuenums.h:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
 3 |  * 
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  * 
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | 
28 | /******************************************************************************
29 |  *
30 |  * Code and text by Sean Baxter, NVIDIA Research
31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
32 |  *
33 |  ******************************************************************************/
34 | 
35 | #pragma once 
36 | 
37 | namespace mgpu {
38 | 
39 | enum MgpuBounds {
40 | 	MgpuBoundsLower,
41 | 	MgpuBoundsUpper
42 | };
43 | 
44 | enum MgpuScanType {
45 | 	MgpuScanTypeExc,
46 | 	MgpuScanTypeInc
47 | };
48 | 
49 | enum MgpuSearchType {
50 | 	MgpuSearchTypeNone,
51 | 	MgpuSearchTypeIndex,
52 | 	MgpuSearchTypeMatch,
53 | 	MgpuSearchTypeIndexMatch
54 | };
55 | 
56 | enum MgpuJoinKind {
57 | 	MgpuJoinKindInner,
58 | 	MgpuJoinKindLeft,
59 | 	MgpuJoinKindRight,
60 | 	MgpuJoinKindOuter
61 | };
62 | 
63 | enum MgpuSetOp {
64 | 	MgpuSetOpIntersection,
65 | 	MgpuSetOpUnion,
66 | 	MgpuSetOpDiff,
67 | 	MgpuSetOpSymDiff
68 | };
69 | 
70 | } // namespace mgpu
71 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/camera_3d.h:
--------------------------------------------------------------------------------
  1 | #ifndef _MOCHIMAZUI_CAMERA_3D_H_
  2 | #define _MOCHIMAZUI_CAMERA_3D_H_
  3 | 
  4 | #include <glm/glm.hpp>
  5 | #include <glm/ext.hpp>
  6 | 
  7 | namespace Mochimazui {
  8 | 
  9 | 	class Camera3D {
 10 | 
 11 | 		friend class CameraController2D;
 12 | 
 13 | 	public:
 14 | 		Camera3D() {
 15 | 		}
 16 | 
 17 | 	public:
 18 | 
 19 | 		void reset() {
 20 | 			_matrix = glm::mat4x4();
 21 | 		}
 22 | 
 23 | 		//
 24 | 		void translate(const glm::vec3 &t) {
 25 | 			translate(t.x, t.y, t.z);
 26 | 		}
 27 | 
 28 | 		template <class T>
 29 | 		void translate(const T &x, const T &y, const T &z) {
 30 | 			_matrix = glm::mat4x4(
 31 | 				1, 0, 0, 0,
 32 | 				0, 1, 0, 0,
 33 | 				0, 0, 1, 0,
 34 | 				x, y, z, 1
 35 | 				) * _matrix;
 36 | 		}
 37 | 
 38 | 		//
 39 | 		template <class T>
 40 | 		void scale(const T &sx, const T &sy, const glm::vec3 &cp = glm::vec3(0.f, 0.f, 0.f)) {
 41 | 			translate(-cp);
 42 | 			_matrix = glm::mat4x4(
 43 | 				sx, 0, 0, 0,
 44 | 				0, sy, 0, 0,
 45 | 				0, 0, 1, 0,
 46 | 				0, 0, 0, 1
 47 | 				) * _matrix;
 48 | 			translate(cp);
 49 | 		}
 50 | 
 51 | 		template <class T>
 52 | 		void scale(const T &s, const glm::vec3 &cp = glm::vec3(0.f, 0.f, 0.f)) {
 53 | 			scale(s, s, cp);
 54 | 		}
 55 | 
 56 | 		void scale(const glm::vec3 &s, const glm::vec3 &cp = glm::vec3(0.f, 0.f, 0.f)) {
 57 | 			scale(s.x, s.y, cp);
 58 | 		}
 59 | 
 60 | 		//
 61 | 		void rotate(const glm::vec3 &c, float a) {
 62 | 		}
 63 | 
 64 | 		//
 65 | 		glm::mat4x4 matrix();
 66 | 
 67 | 		//
 68 | 		//void walk(const float delta, bool fixvr = true);
 69 | 
 70 | 		void turn_y(const float d, const float y) {
 71 | 			_matrix =
 72 | 				glm::translate(glm::vec3(0, y, 0)) *
 73 | 				glm::rotate(d, glm::vec3(1, 0, 0)) *
 74 | 				glm::translate(glm::vec3(0, -y, 0)) * 
 75 | 				_matrix;
 76 | 		}
 77 | 
 78 | 
 79 | 		void rotate_cn(const float d, const glm::vec3 &center, const glm::vec3 &normal) {
 80 | 			_matrix =
 81 | 				glm::translate(center) *
 82 | 				glm::rotate(d, normal) *
 83 | 				glm::translate(-center) *
 84 | 				_matrix;
 85 | 		}
 86 | 
 87 | 		//void pan(const float dx, const float dy);
 88 | 
 89 | 		void rotate(const float dx, const float dy) {
 90 | 			//
 91 | 			glm::vec4 dir = glm::vec4(_eye - _center, 1.f);
 92 | 			dir = glm::rotate(dir, -dx, glm::vec3(0, 1, 0));
 93 | 			//dir /= dir.w;
 94 | 			//_eye = _center + vec3(dir);
 95 | 
 96 | 			//
 97 | 			auto cd = glm::cross(_up, glm::vec3(dir));
 98 | 			dir = glm::rotate(dir, -dy, cd);
 99 | 			dir /= dir.w;
100 | 			_eye = _center + glm::vec3(dir);
101 | 
102 | 			//_up = glm::cross(vec3(dir), cd);
103 | 		}
104 | 
105 | 	private:
106 | 
107 | 		glm::vec3 _eye;
108 | 		glm::vec3 _center;
109 | 		glm::vec3 _up;
110 | 
111 | 		glm::vec3 _scale;
112 | 		glm::vec3 _translate;
113 | 
114 | 		glm::mat4x4 _matrix;
115 | 	};
116 | 
117 | }
118 | 
119 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/util/mgpualloc.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include "util.h"
 5 | #include <cuda.h>
 6 | 
 7 | namespace mgpu {
 8 | 
 9 | class CudaDevice;
10 | 
11 | class CudaContext;
12 | typedef intrusive_ptr<CudaContext> ContextPtr;
13 | 
14 | ////////////////////////////////////////////////////////////////////////////////
15 | // Customizable allocator.
16 | 
17 | // CudaAlloc is the interface class all allocator accesses. Users may derive
18 | // this, implement custom allocators, and set it to the device with 
19 | // CudaDevice::SetAllocator.
20 | 
21 | class CudaAlloc : public CudaBase {
22 | public:
23 | 	virtual cudaError_t Malloc(size_t size, void** p) = 0;
24 | 	virtual bool Free(void* p) = 0;
25 | 	virtual void Clear() = 0;
26 | 
27 | 	virtual ~CudaAlloc() { }
28 | 	
29 | 	CudaDevice& Device() { return _device; }
30 | 	
31 | protected:
32 | 	CudaAlloc(CudaDevice& device) : _device(device) { }
33 | 	CudaDevice& _device;
34 | };
35 | 
36 | // A concrete class allocator that simply calls cudaMalloc and cudaFree.
37 | class CudaAllocSimple : public CudaAlloc {
38 | public:
39 | 	CudaAllocSimple(CudaDevice& device) : CudaAlloc(device) { }
40 | 
41 | 	virtual cudaError_t Malloc(size_t size, void** p);
42 | 	virtual bool Free(void* p);
43 | 	virtual void Clear() { }
44 | 	virtual ~CudaAllocSimple() { }
45 | };
46 | 
47 | // A concrete class allocator that uses exponentially-spaced buckets and an LRU
48 | // to reuse allocations. This is the default allocator. It is shared between
49 | // all contexts on the device.
50 | class CudaAllocBuckets : public CudaAlloc {
51 | public:
52 | 	CudaAllocBuckets(CudaDevice& device);
53 | 	virtual ~CudaAllocBuckets();
54 | 
55 | 	virtual cudaError_t Malloc(size_t size, void** p);
56 | 	virtual bool Free(void* p);
57 | 	virtual void Clear();
58 | 
59 | 	size_t Allocated() const { return _allocated; }
60 | 	size_t Committed() const { return _committed; }
61 | 	size_t Capacity() const { return _capacity; }
62 | 
63 | 	bool SanityCheck() const;
64 | 
65 | 	void SetCapacity(size_t capacity, size_t maxObjectSize) {
66 | 		_capacity = capacity;
67 | 		_maxObjectSize = maxObjectSize;
68 | 		Clear();
69 | 	}
70 | 
71 | private:
72 | 	static const int NumBuckets = 84;
73 | 	static const size_t BucketSizes[NumBuckets];
74 | 
75 | 	struct MemNode;
76 | 	typedef std::list<MemNode> MemList;
77 | 	typedef std::map<void*, MemList::iterator> AddressMap;
78 | 	typedef std::multimap<int, MemList::iterator> PriorityMap;
79 | 
80 | 	struct MemNode {
81 | 		AddressMap::iterator address;
82 | 		PriorityMap::iterator priority;
83 | 		int bucket;
84 | 	};
85 | 
86 | 	void Compact(size_t extra);
87 | 	void FreeNode(MemList::iterator memIt);
88 | 	int LocateBucket(size_t size) const;
89 | 
90 | 	AddressMap _addressMap;
91 | 	PriorityMap _priorityMap;
92 | 	MemList _memLists[NumBuckets + 1];
93 | 
94 | 	size_t _maxObjectSize, _capacity, _allocated, _committed;
95 | 	int _counter;
96 | };
97 | 
98 | } // namespace mgpu
99 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/cuda/cuda_cached_allocator.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _MOCHIMAZUI_THRUST_CACHED_ALLOCATOR_H_
  3 | #define _MOCHIMAZUI_THRUST_CACHED_ALLOCATOR_H_
  4 | 
  5 | #include <iostream>
  6 | #include <exception>
  7 | #include <stdexcept>
  8 | #include <map>
  9 | 
 10 | #include <cuda.h>
 11 | #include <cuda_runtime.h>
 12 | //#include <thrust/system/cuda/vector.h>
 13 | //#include <thrust/system/cuda/execution_policy.h>
 14 | //#include <thrust/host_vector.h>
 15 | //#include <thrust/pair.h>
 16 | 
 17 | namespace Mochimazui {
 18 | 
 19 | class cuda_cached_allocator_bad_alloc : public std::runtime_error {
 20 | public:
 21 | 	cuda_cached_allocator_bad_alloc(const char *msg) :runtime_error(msg) {}
 22 | 	cuda_cached_allocator_bad_alloc(const std::string &msg) :runtime_error(msg) {}
 23 | };
 24 | 
 25 | // Example by Nathan Bell and Jared Hoberock
 26 | // (modified by Mihail Ivakhnenko)
 27 | //
 28 | // This example demonstrates how to intercept calls to get_temporary_buffer
 29 | // and return_temporary_buffer to control how Thrust allocates temporary storage
 30 | // during algorithms such as thrust::reduce. The idea will be to create a simple
 31 | // cache of allocations to search when temporary storage is requested. If a hit
 32 | // is found in the cache, we quickly return the cached allocation instead of
 33 | // resorting to the more expensive thrust::cuda::malloc.
 34 | //
 35 | // Note: this implementation cached_allocator is not thread-safe. If multiple
 36 | // (host) threads use the same cached_allocator then they should gain exclusive
 37 | // access to the allocator before accessing its methods.
 38 | 
 39 | // cached_allocator: a simple allocator for caching allocation requests
 40 | class cuda_cached_allocator {
 41 | public:
 42 | 	// just allocate bytes
 43 | 	typedef char value_type;
 44 | 
 45 | 	cuda_cached_allocator() {}
 46 | 
 47 | 	~cuda_cached_allocator() {
 48 | 		// free all allocations when cached_allocator goes out of scope
 49 | 		free_all();
 50 | 	}
 51 | 
 52 | public:
 53 | 	void reserver(size_t s) {
 54 | 		if (_ptr) { 
 55 | 			cudaFree(_ptr); 
 56 | 		}
 57 | 		cudaMalloc(&_ptr, s);
 58 | 		_reservedSize = s;
 59 | 	}
 60 | 	 
 61 | 	size_t reserved() {
 62 | 		return _reservedSize;
 63 | 	}
 64 | 
 65 | 	void reset() {
 66 | 		_unallocatedPtr = 0;
 67 | 	}
 68 | 
 69 | 	void fill_zero() {
 70 | 		cudaMemsetAsync(_ptr, 0, _reservedSize);
 71 | 	}
 72 | 
 73 | 	size_t allocated() {
 74 | 		return _unallocatedPtr;
 75 | 	}
 76 | 
 77 | 	char* allocate(std::ptrdiff_t num_bytes) {
 78 | 		size_t newPtr = _unallocatedPtr + num_bytes;
 79 | 		if (newPtr > _reservedSize) {
 80 | 			printf("cuda_cached_allocator: reserved memory exhausted.");
 81 | 			throw std::runtime_error("cuda_cached_allocator: reserved memory exhausted.");
 82 | 		}
 83 | 		char *a = _ptr + _unallocatedPtr;
 84 | 		_unallocatedPtr = newPtr;
 85 | 
 86 | 		// 256 bit align
 87 | 		if (_unallocatedPtr & 0x1F) {
 88 | 			_unallocatedPtr += (32 - _unallocatedPtr & 0x1F);
 89 | 		}
 90 | 		return a;
 91 | 	}
 92 | 
 93 | 	template<class T>
 94 | 	T *allocate(size_t num) {
 95 | 		return (T*)this->allocate(num *sizeof(T));
 96 | 	}
 97 | 
 98 | 	template<class T>
 99 | 	void allocate(T **ptr, size_t num) {
100 | 		*ptr = (T*)this->allocate(num *sizeof(T));
101 | 	}
102 | 
103 | 	void deallocate(char* ptr, size_t n) {}
104 | 
105 | private:
106 | 	size_t _reservedSize = 0;
107 | 	size_t _unallocatedPtr = 0;
108 | 	char *_ptr = nullptr;
109 | 
110 | private:
111 | 	void free_all() {
112 | 		cudaFree(_ptr);
113 | 	}
114 | 
115 | };
116 | 
117 | extern cuda_cached_allocator g_thrustCachedAllocator;
118 | extern cuda_cached_allocator &g_alloc;
119 | 
120 | }
121 | 
122 | #endif
123 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/timer.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _SVDAG_CUDA_HEADER_H_
  3 | #define _SVDAG_CUDA_HEADER_H_
  4 | 
  5 | #pragma warning( push ) 
  6 | #pragma warning( disable : 4819 )
  7 | #include <cuda.h>
  8 | #include <cuda_runtime.h>
  9 | #include <cuda_profiler_api.h>
 10 | #include <cuda_gl_interop.h>
 11 | #pragma warning( pop )
 12 | 
 13 | #include <cstdio>
 14 | #include <ctime>
 15 | 
 16 | #include <chrono>
 17 | #include <algorithm>
 18 | #include <iostream>
 19 | 
 20 | #include <mochimazui/3rd/helper_cuda.h>
 21 | 
 22 | namespace Mochimazui {
 23 | class Timer {
 24 | 
 25 | public:
 26 | 	void start() {
 27 | 		_totalTime = std::chrono::system_clock::duration::zero();
 28 | 		resume();
 29 | 	}
 30 | 
 31 | 	void pause() {
 32 | 		auto end = std::chrono::system_clock::now();
 33 | 		_totalTime += end - _start;
 34 | 	}
 35 | 
 36 | 	void resume() {
 37 | 		_start = std::chrono::system_clock::now();
 38 | 	}
 39 | 
 40 | 	void end() {
 41 | 		pause();
 42 | 	}
 43 | 
 44 | public:
 45 | 
 46 | 	void start(const std::string &msg) {
 47 | 		_msg = msg;
 48 | 		printf("%s START.\n", msg.c_str());
 49 | 		start();
 50 | 	}
 51 | 
 52 | 	void pause(const std::string &msg) {
 53 | 		pause();
 54 | 		printf("%s PAUSE.\n", msg.c_str());
 55 | 	}
 56 | 
 57 | 	void resume(const std::string &msg) {
 58 | 		resume();
 59 | 		printf("%s RESUME.\n", msg.c_str());
 60 | 	}
 61 | 
 62 | 	void end(const std::string& msg) {
 63 | 		end();
 64 | 		auto omsg = msg == "" ? _msg : msg;
 65 | 		std::cout << omsg << " END" << std::endl;
 66 | 		std::cout << omsg << " duration = "
 67 | 			<< std::chrono::duration_cast<std::chrono::milliseconds>(_totalTime).count()
 68 | 			<< "ms." << std::endl;
 69 | 	}
 70 | 
 71 | public:
 72 | 	std::chrono::system_clock::duration time() {
 73 | 		return _totalTime;
 74 | 	}
 75 | 
 76 | 	float time_in_ms() {
 77 | 		return std::chrono::duration_cast<std::chrono::microseconds>(_totalTime).count() / 1000.f;
 78 | 	}
 79 | 
 80 | private:
 81 | 
 82 | 	std::string _msg;
 83 | 	std::chrono::system_clock::time_point _start;
 84 | 	std::chrono::system_clock::duration _totalTime;
 85 | };
 86 | 
 87 | 
 88 | class CUDATimer {
 89 | 
 90 | public:
 91 | 	CUDATimer() {
 92 | 		cudaEventCreate(&_start);
 93 | 		cudaEventCreate(&_stop);
 94 | 	}
 95 | 
 96 | 	void start() {
 97 | 		cudaEventRecord(_start);
 98 | 	}
 99 | 
100 | 	void start(const std::string &msg) {
101 | 		_msg = msg;
102 | 		start();
103 | 	}
104 | 
105 | 	float stop() {
106 | 		cudaEventRecord(_stop);
107 | 		cudaEventSynchronize(_stop);
108 | 		_ms = 0;
109 | 		cudaEventElapsedTime(&_ms, _start, _stop);
110 | 		return _ms;
111 | 	}
112 | 
113 | 	float stop(const std::string &msg) {
114 | 		stop();
115 | 		if (msg.length() != 0) { _msg = msg; }
116 | 		std::cout << _msg << " END." << std::endl;
117 | 		std::cout << _msg << " duration = " << _ms << " ms." << std::endl;
118 | 		return _ms;
119 | 	}
120 | 
121 | private:
122 | 	std::string _msg;
123 | 	cudaEvent_t _start, _stop;
124 | 	float _ms;
125 | };
126 | }
127 | 
128 | static Mochimazui::CUDATimer g_timer_zero;
129 | static int g_timer_enabled = 0;
130 | static inline void timer_reset() {
131 | 	//clock_gettime(CLOCK_MONOTONIC,&g_timer_zero);
132 | 	g_timer_zero.start();
133 | 	g_timer_enabled = 1;
134 | }
135 | static inline void timer_print(const char* msg, const char* file, int line) {
136 | 	if (!g_timer_enabled) { return; }
137 | 	//timespec cur_time;
138 | 	cudaDeviceSynchronize();
139 | 	__getLastCudaError(msg, file, line);
140 | 	//clock_gettime(CLOCK_MONOTONIC,&cur_time);
141 | 	//double dt=(double)(cur_time.tv_sec-g_timer_zero.tv_sec)+(double)(cur_time.tv_nsec-g_timer_zero.tv_nsec)*1e-9;
142 | 	float dt = g_timer_zero.stop();
143 | 	//g_timer_zero=cur_time;
144 | 	printf(">>> %s %5.2f ms\n", msg, dt);
145 | 	g_timer_zero.start();
146 | }
147 | static inline void timer_done() {
148 | 	g_timer_enabled = 0;
149 | }
150 | 
151 | #endif


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/config.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <mochimazui/config.h>
  3 | #include <mochimazui/string.h>
  4 | #include <mochimazui/file.h>
  5 | 
  6 | namespace Mochimazui {
  7 | 
  8 | using namespace ConfigPrivate;
  9 | 
 10 | using std::vector;
 11 | using std::string;
 12 | using std::basic_string;
 13 | using std::runtime_error;
 14 | 
 15 | namespace program_options = boost::program_options;
 16 | 
 17 | // -------- -------- -------- -------- -------- -------- -------- --------
 18 | void Config::addValue(const std::string &iv) {
 19 | 	stdext::string v = iv;
 20 | 	auto l = v.split(':');
 21 | 	if (l.size() != 2 && l.size() != 3) {
 22 | 		throw std::runtime_error("Config::addValue: invalid ConfigValue format " + iv);
 23 | 	}
 24 | 	ConfigValue cv;
 25 | 	cv.setName(l[0]);
 26 | 	cv.setType(l[1]);
 27 | 	if (l.size() == 3) {
 28 | 		cv.setValue(l[2].c_str());
 29 | 	}
 30 | 	_value_map[l[0]] = cv;
 31 | }
 32 | 
 33 | void Config::addValue(const std::vector<std::string> &vv) {
 34 | 	for (const auto &v : vv) {
 35 | 		addValue(v);
 36 | 	}
 37 | }
 38 | 
 39 | // -------- -------- -------- -------- -------- -------- -------- --------
 40 | void Config::addOption(const std::string &o) {
 41 | 
 42 | 	ConfigOption co;
 43 | 
 44 | 	auto i_space = o.find(' ');
 45 | 
 46 | 	co.name = o.substr(0, i_space);
 47 | 
 48 | 	auto i_lb = o.find('{');
 49 | 	auto i_rb = o.find('}');
 50 | 
 51 | 	// help info, ignore.
 52 | 
 53 | 	stdext::string values = o.substr(i_rb + 2);
 54 | 	auto value_list = values.split('#');
 55 | 
 56 | 	if (value_list[0] == "0") { co.type = ConfigOption_Void; }
 57 | 	else if (value_list[0] == "1") { co.type = ConfigOption_Value; }
 58 | 	else if (value_list[0] == "*") { co.type = ConfigOption_Array; }
 59 | 	else { throw std::runtime_error("Config::addOption: invalid option format."); }
 60 | 
 61 | 	for (int i = 1; i < value_list.size(); ++i) {
 62 | 		auto l = value_list[i].split(':');
 63 | 		if (l.size() != 1 && l.size() != 2) {
 64 | 			throw std::runtime_error("Config::addOption: invalid option format.");
 65 | 		}
 66 | 		ConfigOptionSetValue sv;
 67 | 		sv.name = l[0];
 68 | 		if (l.size() == 2) { sv.value = l[1]; }
 69 | 		co.values.push_back(sv);
 70 | 	}
 71 | 
 72 | 	_option_map["-" + co.name] = co;
 73 | }
 74 | 
 75 | void Config::addOption(const std::vector<std::string> &vo) {
 76 | 	for (const auto &o : vo) {
 77 | 		addOption(o);
 78 | 	}
 79 | }
 80 | 
 81 | // -------- -------- -------- -------- -------- -------- -------- --------
 82 | void help() {
 83 | }
 84 | 
 85 | // -------- -------- -------- -------- -------- -------- -------- --------
 86 | void Config::load(const std::string &file) {
 87 | 
 88 | 	vector<string> args;
 89 | 	command_line_file_to_args(file, args);
 90 | 
 91 | 	vector<const char*> args_ptr;
 92 | 	for (string &s : args) { args_ptr.push_back(s.data()); }
 93 | 
 94 | 	parse((int)args_ptr.size(), args_ptr.data());
 95 | }
 96 | 
 97 | void Config::parse(int argc, const char *argv[]) {
 98 | 
 99 | 	for (int i = 1; i < argc;) {
100 | 		std::string arg = argv[i];
101 | 		auto ioption = _option_map.find(arg);
102 | 		if (ioption == _option_map.end()) {
103 | 			throw std::runtime_error("Config::parse: unsupported option " + arg);
104 | 		}
105 | 		++i;
106 | 
107 | 		const auto &co = ioption->second;
108 | 
109 | 		std::string value;
110 | 		if (co.type != ConfigOption_Void) { 
111 | 			if (i >= argc) { throw std::runtime_error(arg + " requires more input."); }
112 | 			value = argv[i];
113 | 			++i;
114 | 		}
115 | 
116 | 		for (const auto &v : co.values) {
117 | 			auto iv = _value_map.find(v.name);
118 | 			if (iv == _value_map.end()) {
119 | 				ConfigValue new_value;
120 | 				new_value.setName(v.name);
121 | 				new_value.setType("any");
122 | 				new_value.setValue(v.value.empty() ? value : v.value);
123 | 			}
124 | 			else {
125 | 				iv->second.setValue(v.value.empty() ? value : v.value);
126 | 			}
127 | 		}
128 | 
129 | 	}
130 | 
131 | }
132 | 
133 | 
134 | }
135 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/kernels_ext/search_ext.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 | * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
 3 | *
 4 | * Redistribution and use in source and binary forms, with or without
 5 | * modification, are permitted provided that the following conditions are met:
 6 | *     * Redistributions of source code must retain the above copyright
 7 | *       notice, this list of conditions and the following disclaimer.
 8 | *     * Redistributions in binary form must reproduce the above copyright
 9 | *       notice, this list of conditions and the following disclaimer in the
10 | *       documentation and/or other materials provided with the distribution.
11 | *     * Neither the name of the NVIDIA CORPORATION nor the
12 | *       names of its contributors may be used to endorse or promote products
13 | *       derived from this software without specific prior written permission.
14 | *
15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 | * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | *
26 | ******************************************************************************/
27 | 
28 | /******************************************************************************
29 | *
30 | * Code and text by Sean Baxter, NVIDIA Research
31 | * See http://nvlabs.github.io/moderngpu for repository and documentation.
32 | *
33 | ******************************************************************************/
34 | 
35 | #pragma once
36 | 
37 | #include "../kernels/search.cuh"
38 | 
39 | #include "../../../cuda/cuda_cached_allocator.h"
40 | 
41 | namespace mgpu_ext {
42 | 
43 | 	using Mochimazui::g_alloc;
44 | 
45 | 	using namespace mgpu;
46 | 
47 | 	template<MgpuBounds Bounds, typename It1, typename Comp>
48 | 	int *BinarySearchPartitions(int count, It1 data_global, int numItems,
49 | 		int nv, Comp comp) {
50 | 
51 | 		const int NT = 64;
52 | 		int numBlocks = MGPU_DIV_UP(count, nv);
53 | 		int numPartitionBlocks = MGPU_DIV_UP(numBlocks + 1, NT);
54 | 		//MGPU_MEM(int) partitionsDevice = context.Malloc<int>(numBlocks + 1);
55 | 		int *partitionsDevice = (int*)g_alloc.allocate(sizeof(int) * (numBlocks + 1));
56 | 
57 | 		KernelBinarySearch<NT, Bounds>
58 | 			<< <numPartitionBlocks, NT >> >(count, data_global,
59 | 			numItems, nv, partitionsDevice, numBlocks + 1, comp);
60 | 		MGPU_SYNC_CHECK("KernelBinarySearch");
61 | 
62 | 		return partitionsDevice;
63 | 	}
64 | 
65 | 	template<MgpuBounds Bounds, typename It1, typename It2, typename Comp>
66 | 	int *MergePathPartitions_ext(It1 a_global, int aCount, It2 b_global,
67 | 		int bCount, int nv, int coop, Comp comp) {
68 | 
69 | 		const int NT = 64;
70 | 		int numPartitions = MGPU_DIV_UP(aCount + bCount, nv);
71 | 		int numPartitionBlocks = MGPU_DIV_UP(numPartitions + 1, NT);
72 | 
73 | 		//MGPU_MEM(int) partitionsDevice = context.Malloc<int>(numPartitions + 1);
74 | 		int *partitionsDevice = g_alloc.allocate<int>(numPartitions + 1);
75 | 
76 | 		KernelMergePartition<NT, Bounds>
77 | 			<< <numPartitionBlocks, NT>> >(a_global, aCount,
78 | 			b_global, bCount, nv, coop, partitionsDevice, numPartitions + 1,
79 | 			comp);
80 | 		MGPU_SYNC_CHECK("KernelMergePartition");
81 | 
82 | 		return partitionsDevice;
83 | 	}
84 | 
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rapidxml_utils.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RAPIDXML_UTILS_HPP_INCLUDED
  2 | #define RAPIDXML_UTILS_HPP_INCLUDED
  3 | 
  4 | // Copyright (C) 2006, 2009 Marcin Kalicinski
  5 | // Version 1.13
  6 | // Revision $DateTime: 2009/05/13 01:46:17 $
  7 | //! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities that can be useful
  8 | //! in certain simple scenarios. They should probably not be used if maximizing performance is the main objective.
  9 | 
 10 | #include "rapidxml.hpp"
 11 | #include <vector>
 12 | #include <string>
 13 | #include <fstream>
 14 | #include <stdexcept>
 15 | 
 16 | namespace rapidxml
 17 | {
 18 | 
 19 |     //! Represents data loaded from a file
 20 |     template<class Ch = char>
 21 |     class file
 22 |     {
 23 |         
 24 |     public:
 25 |         
 26 |         //! Loads file into the memory. Data will be automatically destroyed by the destructor.
 27 |         //! \param filename Filename to load.
 28 |         file(const char *filename)
 29 |         {
 30 |             using namespace std;
 31 | 
 32 |             // Open stream
 33 |             basic_ifstream<Ch> stream(filename, ios::binary);
 34 |             if (!stream)
 35 |                 throw runtime_error(string("cannot open file ") + filename);
 36 |             stream.unsetf(ios::skipws);
 37 |             
 38 |             // Determine stream size
 39 |             stream.seekg(0, ios::end);
 40 |             size_t size = stream.tellg();
 41 |             stream.seekg(0);   
 42 |             
 43 |             // Load data and add terminating 0
 44 |             m_data.resize(size + 1);
 45 |             stream.read(&m_data.front(), static_cast<streamsize>(size));
 46 |             m_data[size] = 0;
 47 |         }
 48 | 
 49 |         //! Loads file into the memory. Data will be automatically destroyed by the destructor
 50 |         //! \param stream Stream to load from
 51 |         file(std::basic_istream<Ch> &stream)
 52 |         {
 53 |             using namespace std;
 54 | 
 55 |             // Load data and add terminating 0
 56 |             stream.unsetf(ios::skipws);
 57 |             m_data.assign(istreambuf_iterator<Ch>(stream), istreambuf_iterator<Ch>());
 58 |             if (stream.fail() || stream.bad())
 59 |                 throw runtime_error("error reading stream");
 60 |             m_data.push_back(0);
 61 |         }
 62 |         
 63 |         //! Gets file data.
 64 |         //! \return Pointer to data of file.
 65 |         Ch *data()
 66 |         {
 67 |             return &m_data.front();
 68 |         }
 69 | 
 70 |         //! Gets file data.
 71 |         //! \return Pointer to data of file.
 72 |         const Ch *data() const
 73 |         {
 74 |             return &m_data.front();
 75 |         }
 76 | 
 77 |         //! Gets file data size.
 78 |         //! \return Size of file data, in characters.
 79 |         std::size_t size() const
 80 |         {
 81 |             return m_data.size();
 82 |         }
 83 | 
 84 |     private:
 85 | 
 86 |         std::vector<Ch> m_data;   // File data
 87 | 
 88 |     };
 89 | 
 90 |     //! Counts children of node. Time complexity is O(n).
 91 |     //! \return Number of children of node
 92 |     template<class Ch>
 93 |     inline std::size_t count_children(xml_node<Ch> *node)
 94 |     {
 95 |         xml_node<Ch> *child = node->first_node();
 96 |         std::size_t count = 0;
 97 |         while (child)
 98 |         {
 99 |             ++count;
100 |             child = child->next_sibling();
101 |         }
102 |         return count;
103 |     }
104 | 
105 |     //! Counts attributes of node. Time complexity is O(n).
106 |     //! \return Number of attributes of node
107 |     template<class Ch>
108 |     inline std::size_t count_attributes(xml_node<Ch> *node)
109 |     {
110 |         xml_attribute<Ch> *attr = node->first_attribute();
111 |         std::size_t count = 0;
112 |         while (attr)
113 |         {
114 |             ++count;
115 |             attr = attr->next_attribute();
116 |         }
117 |         return count;
118 |     }
119 | 
120 | }
121 | 
122 | #endif
123 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/kernels/loadbalance.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
 3 |  * 
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  * 
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | 
28 | /******************************************************************************
29 |  *
30 |  * Code and text by Sean Baxter, NVIDIA Research
31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
32 |  *
33 |  ******************************************************************************/
34 | 
35 | #pragma once
36 | 
37 | #include "../mgpuhost.cuh"
38 | #include "../device/ctaloadbalance.cuh"
39 | #include "../kernels/search.cuh"
40 | 
41 | namespace mgpu {
42 | 
43 | ////////////////////////////////////////////////////////////////////////////////
44 | // KernelLoadBalance
45 | 
46 | template<typename Tuning, typename InputIt>
47 | MGPU_LAUNCH_BOUNDS void KernelLoadBalance(int aCount, InputIt b_global,
48 | 	int bCount, const int* mp_global, int* indices_global) {
49 | 
50 | 	typedef MGPU_LAUNCH_PARAMS Params;
51 | 	const int NT = Params::NT;
52 | 	const int VT = Params::VT;
53 | 	__shared__ int indices_shared[NT * (VT + 1)];
54 | 	
55 | 	int tid = threadIdx.x;
56 | 	int block = blockIdx.x;
57 | 	int4 range = CTALoadBalance<NT, VT>(aCount, b_global, bCount, block, tid,
58 | 		mp_global, indices_shared, false);
59 | 	aCount = range.y - range.x;
60 | 
61 | 	DeviceSharedToGlobal<NT, VT>(aCount, indices_shared, tid, 
62 | 		indices_global + range.x, false);
63 | }
64 | 
65 | ////////////////////////////////////////////////////////////////////////////////
66 | // LoadBalanceSearch
67 | 
68 | template<typename InputIt>
69 | MGPU_HOST void LoadBalanceSearch(int aCount, InputIt b_global, int bCount,
70 | 	int* indices_global, CudaContext& context) {
71 | 
72 | 	const int NT = 128;
73 | 	const int VT = 7;
74 | 	typedef LaunchBoxVT<NT, VT> Tuning;
75 | 	int2 launch = Tuning::GetLaunchParams(context);
76 | 	const int NV = launch.x * launch.y;
77 | 	  
78 | 	MGPU_MEM(int) partitionsDevice = MergePathPartitions<MgpuBoundsUpper>(
79 | 		mgpu::counting_iterator<int>(0), aCount, b_global, bCount, NV, 0,
80 | 		mgpu::less<int>(), context);
81 | 
82 | 	int numBlocks = MGPU_DIV_UP(aCount + bCount, NV);
83 | 	KernelLoadBalance<Tuning><<<numBlocks, launch.x, 0, context.Stream()>>>(
84 | 		aCount, b_global, bCount, partitionsDevice->get(), indices_global);
85 | 	MGPU_SYNC_CHECK("KernelLoadBalance");
86 | }
87 | 
88 | } // namespace mgpu
89 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/vg_config.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #define _CRT_SECURE_NO_WARNINGS
  3 | 
  4 | #include "vg_config.h"
  5 | 
  6 | #include <cstring>
  7 | 
  8 | #include <iostream>
  9 | #include <fstream>
 10 | #include <sstream>
 11 | #include <algorithm>
 12 | #include <functional>
 13 | 
 14 | #include <mochimazui/file.h>
 15 | #include <mochimazui/string.h>
 16 | #include <mochimazui/stdio_ext.h>
 17 | 
 18 | // -------- -------- -------- -------- -------- -------- -------- --------
 19 | namespace Mochimazui {
 20 | 
 21 | namespace PRIVATE {
 22 | boost::program_options::variables_map g_config_variables;
 23 | }
 24 | 
 25 | int init_config(int argc, char *argv[]) {
 26 | 
 27 | 	namespace po = boost::program_options;
 28 | 
 29 | 	using PRIVATE::g_config_variables;
 30 | 	using Mochimazui::parse_command_line_file;
 31 | 
 32 | 	po::options_description general_options("General options");
 33 | 	general_options.add_options()
 34 | 		("help", "print help")
 35 | 
 36 | 		("verbose", po::bool_switch(), "verbose output to console")
 37 | 		("gl-debug", po::bool_switch(), "enable GL_DEBUG")
 38 | 		("draw-curve", po::bool_switch(), "draw curve")
 39 | 		("show-fps", po::bool_switch(), "show fps")
 40 | 
 41 | 		("benchmark", po::bool_switch(), "benchmark")
 42 | 		("step-timing", po::bool_switch(), "step timing")
 43 | 		("attach-timing-to", po::value<std::string>()->default_value(""), "")
 44 | 
 45 | 		("merge-path", po::bool_switch(), "")
 46 | 		("minimal-ui", po::bool_switch(), "produce help message")
 47 | 
 48 | 		("v-flip", po::bool_switch(), "")
 49 | 
 50 | 		("count-pixel", po::bool_switch(), "")
 51 | 		("attach-pixel-count-to", po::value<std::string>()->default_value(""), "")
 52 | 
 53 | 		("animation", po::bool_switch(), "run chrod animation")
 54 | 		;
 55 | 
 56 | 	po::options_description io_options("Input/output options");
 57 | 	io_options.add_options()
 58 | 		("file-index", po::value<std::string>()->multitoken(), "file index")
 59 | 
 60 | 		("input-name", po::value<std::string>()->default_value(""), "")
 61 | 		("input-file,i", po::value<std::string>(), "input file")
 62 | 
 63 | 		("input-width", po::value<int>()->default_value(0), "")
 64 | 		("input-height", po::value<int>()->default_value(0), "")
 65 | 
 66 | 		("window-width", po::value<int>()->default_value(1200), "")
 67 | 		("window-height", po::value<int>()->default_value(1024), "")
 68 | 
 69 | 		("fit-to-vg", po::bool_switch(), "")
 70 | 		("fit-to-window", po::bool_switch(), "")
 71 | 
 72 | 		("save-output-file", po::bool_switch(), "")
 73 | 		("output-file,o", po::value<std::string>()->default_value(""), "input file")
 74 | 
 75 | 		("output-width", po::value<int>()->default_value(0), "")
 76 | 		("output-height", po::value<int>()->default_value(0), "")
 77 | 		("fix-output-size", po::bool_switch(), "")
 78 | 		;
 79 | 
 80 | 	po::options_description rasterizer_options("Rasterizer options");
 81 | 	rasterizer_options.add_options()
 82 | 		("c-m-cs", po::bool_switch(), "cut, mask table, comb-like scanline")
 83 | 
 84 | 		("lrgb", po::bool_switch(), "")
 85 | 		("srgb", po::bool_switch(), "")
 86 | 
 87 | 		("samples", po::value<int>()->default_value(32), "")
 88 | 		("ms-output", po::bool_switch(), "")
 89 | 
 90 | 		("reserve-ink", po::value<int>()->default_value(0), "reserve ink")
 91 | 		("tiger-clip", po::bool_switch(), "")
 92 | 
 93 | 		("break-before-gl", po::bool_switch(), "break before gl step")
 94 | 
 95 | 		("a128", po::bool_switch(), "align alpha value to 1/128")
 96 | 		;
 97 | 
 98 | 	po::options_description all_options;
 99 | 	all_options.add(general_options).add(io_options).add(rasterizer_options);
100 | 
101 | 	if (argc == 1) {	
102 | 		po::store(parse_command_line_file<char>("vg_default.cfg", all_options), g_config_variables);
103 | 	}
104 | 	else {
105 | 		po::store(po::parse_command_line(argc, argv, all_options), g_config_variables);
106 | 	}
107 | 
108 | 	po::notify(g_config_variables);
109 | 
110 | 	if (g_config_variables.count("help")) {
111 | 		printf("\nLoads \"vg_default.cfg\" by default.\n");
112 | 		printf("Using command line argument will skip config file loading.\n");
113 | 		std::cout << all_options << "\n";
114 | 		return -1;
115 | 	}
116 | 
117 | 	return 0;
118 | 
119 | }
120 | 
121 | }
122 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/device/launchbox.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
 3 |  * 
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  * 
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | 
28 | /******************************************************************************
29 |  *
30 |  * Code and text by Sean Baxter, NVIDIA Research
31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
32 |  *
33 |  ******************************************************************************/
34 | 
35 | #pragma once
36 | 
37 | #include "../util/mgpucontext.h"
38 | 
39 | namespace mgpu {
40 | 	
41 | #if __CUDA_ARCH__ >= 350
42 | 	#define MGPU_SM_TAG Sm35
43 | #elif __CUDA_ARCH__ >= 300
44 | 	#define MGPU_SM_TAG Sm30
45 | #elif __CUDA_ARCH__ >= 200
46 | 	#define MGPU_SM_TAG Sm20
47 | #else
48 | 	#define MGPU_SM_TAG Sm20
49 | #endif
50 | 
51 | #define MGPU_LAUNCH_PARAMS typename Tuning::MGPU_SM_TAG
52 | #define MGPU_LAUNCH_BOUNDS __global__ \
53 | 	__launch_bounds__(Tuning::MGPU_SM_TAG::NT, Tuning::MGPU_SM_TAG::OCC)
54 | 
55 | // Returns (NT, VT) from the sm version.
56 | template<typename Derived>
57 | struct LaunchBoxRuntime {
58 | 	static int2 GetLaunchParams(CudaContext& context) {
59 | 		return GetLaunchParams(context.PTXVersion());
60 | 	}
61 | 
62 | 	static int2 GetLaunchParams(int sm) {
63 | 		if(sm >= 350) 
64 | 			return make_int2(Derived::Sm35::NT, Derived::Sm35::VT);
65 | 		else if(sm >= 300) 
66 | 			return make_int2(Derived::Sm30::NT, Derived::Sm30::VT);
67 | 		else
68 | 			return make_int2(Derived::Sm20::NT, Derived::Sm20::VT);
69 | 	}
70 | };
71 | 
72 | // General LaunchBox for any param types.
73 | template<
74 | 	typename Sm20_, 
75 | 	typename Sm30_ = Sm20_,
76 | 	typename Sm35_ = Sm30_>
77 | struct LaunchBox : LaunchBoxRuntime<LaunchBox<Sm20_, Sm30_, Sm35_> > {
78 | 	typedef Sm20_ Sm20;
79 | 	typedef Sm30_ Sm30;
80 | 	typedef Sm35_ Sm35;	
81 | };
82 | 
83 | // LaunchBox over (NT, VT, NumBlocks)
84 | template<int NT_, int VT_, int OCC_>
85 | struct LaunchParamsVT {
86 | 	enum { NT = NT_, VT = VT_, OCC = OCC_ };
87 | };
88 | template<
89 | 	int NT_SM20,           int VT_SM20,           int OCC_SM20 = 0,
90 | 	int NT_SM30 = NT_SM20, int VT_SM30 = VT_SM20, int OCC_SM30 = OCC_SM20,
91 | 	int NT_SM35 = NT_SM30, int VT_SM35 = VT_SM30, int OCC_SM35 = OCC_SM30>
92 | struct LaunchBoxVT : LaunchBox<
93 | 	LaunchParamsVT<NT_SM20, VT_SM20, OCC_SM20>,
94 | 	LaunchParamsVT<NT_SM30, VT_SM30, OCC_SM30>,
95 | 	LaunchParamsVT<NT_SM35, VT_SM35, OCC_SM35> > { };
96 | 
97 | } // namespace mgpu
98 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/ms_output_8.vert.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #version 450
  3 | 
  4 | // -------- -------- -------- -------- -------- -------- -------- --------
  5 | layout(binding = 0) uniform isamplerBuffer tb_index;
  6 | layout(binding = 1) uniform isamplerBuffer tb_span;
  7 | layout(binding = 2) uniform isamplerBuffer tb_path_fragment;
  8 | layout(binding = 3) uniform isamplerBuffer tb_stencil_mask;
  9 | 
 10 | layout(binding = 4) uniform samplerBuffer tex_table;
 11 | layout(binding = 5) uniform sampler2D tex_ramp;
 12 | 
 13 | // -------- -------- -------- -------- -------- -------- -------- --------
 14 | uniform vec3 pid2depth_irampheight;
 15 | uniform vec3 inv_proj_rx;
 16 | uniform vec3 inv_proj_ry;
 17 | uniform vec3 inv_proj_rw;
 18 | uniform vec3 inv_proj_rp;
 19 | uniform float inv_proj_a;
 20 | 
 21 | uniform bool enable_srgb_correction;
 22 | 
 23 | uniform ivec2 vp_size;
 24 | 
 25 | // -------- -------- -------- -------- -------- -------- -------- --------
 26 | flat out vec4 fragment_color;
 27 | 
 28 | flat out ivec2 path_frag_pos;
 29 | 
 30 | flat out int pixel_mask;
 31 | 
 32 | // -------- -------- -------- -------- -------- -------- -------- --------
 33 | out vec3 gradient_coord_0;
 34 | out vec3 gradient_coord_1;
 35 | flat out vec3 gradient_ramp_coord;
 36 | flat out vec3 gradient_focal_point;
 37 | 
 38 | // -------- -------- -------- -------- -------- -------- -------- --------
 39 | vec4 u8rgba2frgba(int c) {
 40 | 	return vec4(c & 0xFF, (c >> 8) & 0xFF, (c >> 16) & 0xFF, (c >> 24) & 0xFF) / 255.0;
 41 | }
 42 | 
 43 | // -------- -------- -------- -------- -------- -------- -------- --------
 44 | float srgb_to_lrgb_f(float f) {
 45 | 	if (f <= 0.04045f) { return f / 12.92f; }
 46 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
 47 | }
 48 | 
 49 | vec4 srgb_to_lrgb(vec4 c) {
 50 | 	return vec4(srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a);
 51 | }
 52 | 
 53 | // -------- -------- -------- --------
 54 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
 55 | 
 56 | // -------- -------- -------- --------
 57 | void calc_color(int colori, vec2 vertex) {
 58 | 
 59 | 	if (uint(colori - 1) < uint(0x01000000)) {
 60 | 
 61 | 		// 1. fetch gradient transform & focal point.
 62 | 		int path_id = (colori - 1) * 3;
 63 | 		vec4 word0 = texelFetch(tex_table, path_id);
 64 | 		vec4 word1 = texelFetch(tex_table, path_id + 1);
 65 | 		gradient_focal_point = texelFetch(tex_table, path_id + 2).xyz;
 66 | 
 67 | 		// 2. transform back to object space.
 68 | 		vec2 vertex_0 = vertex + vec2(0.0, 0.5);
 69 | 		vec2 vertex_1 = vertex + vec2(0.0, 1.5);
 70 | 
 71 | 		vec3 rd_0 = inv_proj_rx*vertex_0.x + inv_proj_ry*vertex_0.y + inv_proj_rw;
 72 | 		vec3 rd_1 = inv_proj_rx*vertex_1.x + inv_proj_ry*vertex_1.y + inv_proj_rw;
 73 | 
 74 | 		vec3 obj_space_vertex_0 = inv_proj_rp + (inv_proj_a / rd_0.z)*rd_0;
 75 | 		vec3 obj_space_vertex_1 = inv_proj_rp + (inv_proj_a / rd_1.z)*rd_1;
 76 | 
 77 | 		// 3. transform to gradient space.
 78 | 		gradient_coord_0 = vec3(
 79 | 			obj_space_vertex_0.x*word0.xw +
 80 | 			obj_space_vertex_0.y*vec2(word0.y, word1.x) +
 81 | 			vec2(word0.z, word1.y),
 82 | 			1.0);
 83 | 
 84 | 		gradient_coord_1 = vec3(
 85 | 			obj_space_vertex_1.x*word0.xw +
 86 | 			obj_space_vertex_1.y*vec2(word0.y, word1.x) +
 87 | 			vec2(word0.z, word1.y),
 88 | 			1.0);
 89 | 
 90 | 		// 4. ramp.
 91 | 		int ramp_coordi = floatBitsToInt(word1.z);
 92 | 		gradient_ramp_coord = vec3(
 93 | 			(float(ramp_coordi & 1023) + 0.5)*(1.0 / 1024.0),
 94 | 			(float(ramp_coordi >> 10) + 0.5)*pid2depth_irampheight.z,
 95 | 			word1.w);
 96 | 	}
 97 | 	else {
 98 | 		gradient_ramp_coord.z = 0.0;
 99 | 		vec4 color = u8rgba2frgba(colori);
100 | 		fragment_color = enable_srgb_correction ? srgb_to_lrgb(color) : color;
101 | 	}
102 | }
103 | 
104 | // -------- -------- -------- --------
105 | void main() {
106 | 
107 | 	int index = gl_VertexID >> 1;
108 | 	int line_vi = gl_VertexID & 1;
109 | 
110 | 	ivec4 draw = texelFetch(tb_index, index);
111 | 
112 | 	path_frag_pos = ivec2(draw.x & 0xFFFF, draw.x >> 16);
113 | 
114 | 	vec2 pos = vec2(
115 | 		path_frag_pos.x + line_vi * draw.y,
116 | 		path_frag_pos.y
117 | 		);
118 | 
119 | 	calc_color(draw.z, pos);
120 | 
121 | 	pos.y += 1;
122 | 
123 | 	pos.x = pos.x / float(vp_size.x) * 2 - 1.0;
124 | 	pos.y = pos.y / float(vp_size.y) * 2 - 1.0;
125 | 
126 | 	gl_Position = vec4(pos, 0, 1);
127 | 
128 | 	pixel_mask = (draw.w == 0) ? 0xFFFFFFFF
129 | 		: texelFetch(tb_stencil_mask, draw.w - 1).r;
130 | }
131 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/output_8.vert.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #version 450
  3 | 
  4 | // -------- -------- -------- -------- -------- -------- -------- --------
  5 | layout(binding = 0) uniform isamplerBuffer tb_index;
  6 | layout(binding = 1) uniform isamplerBuffer tb_span;
  7 | layout(binding = 2) uniform isamplerBuffer tb_path_fragment;
  8 | layout(binding = 3) uniform isamplerBuffer tb_stencil_mask;
  9 | 
 10 | layout(binding = 4) uniform samplerBuffer tex_table;
 11 | layout(binding = 5) uniform sampler2D tex_ramp;
 12 | 
 13 | // -------- -------- -------- -------- -------- -------- -------- --------
 14 | uniform vec3 pid2depth_irampheight;
 15 | uniform vec3 inv_proj_rx;
 16 | uniform vec3 inv_proj_ry;
 17 | uniform vec3 inv_proj_rw;
 18 | uniform vec3 inv_proj_rp;
 19 | uniform float inv_proj_a;
 20 | 
 21 | uniform bool enable_srgb_correction;
 22 | 
 23 | uniform ivec2 vp_size;
 24 | 
 25 | // -------- -------- -------- -------- -------- -------- -------- --------
 26 | //flat out int fragment_type;
 27 | flat out vec4 fragment_color;
 28 | 
 29 | flat out ivec2 path_frag_pos;
 30 | 
 31 | flat out int pixel_mask;
 32 | 
 33 | // -------- -------- -------- -------- -------- -------- -------- --------
 34 | out vec3 gradient_coord_0;
 35 | out vec3 gradient_coord_1;
 36 | flat out vec3 gradient_ramp_coord;
 37 | flat out vec3 gradient_focal_point;
 38 | 
 39 | // -------- -------- -------- -------- -------- -------- -------- --------
 40 | vec4 u8rgba2frgba(int c) {
 41 | 	return vec4(c & 0xFF, (c >> 8) & 0xFF, (c >> 16) & 0xFF, (c >> 24) & 0xFF) / 255.0;
 42 | }
 43 | 
 44 | // -------- -------- -------- -------- -------- -------- -------- --------
 45 | float srgb_to_lrgb_f(float f) {
 46 | 	if (f <= 0.04045f) { return f / 12.92f; }
 47 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
 48 | }
 49 | 
 50 | vec4 srgb_to_lrgb(vec4 c) {
 51 | 	return vec4(srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a);
 52 | }
 53 | 
 54 | // -------- -------- -------- --------
 55 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
 56 | 
 57 | // -------- -------- -------- --------
 58 | void calc_color(int colori, vec2 vertex) {
 59 | 
 60 | 	if (uint(colori - 1) < uint(0x01000000)) {
 61 | 
 62 | 		// 1. fetch gradient transform & focal point.
 63 | 		int path_id = (colori - 1) * 3;
 64 | 		vec4 word0 = texelFetch(tex_table, path_id);
 65 | 		vec4 word1 = texelFetch(tex_table, path_id + 1);
 66 | 		gradient_focal_point = texelFetch(tex_table, path_id + 2).xyz;
 67 | 
 68 | 		// 2. transform back to object space.
 69 | 		vec2 vertex_0 = vertex + vec2(1.0, 0.5);
 70 | 		vec2 vertex_1 = vertex + vec2(1.0, 1.5);
 71 | 
 72 | 		vec3 rd_0 = inv_proj_rx*vertex_0.x + inv_proj_ry*vertex_0.y + inv_proj_rw;
 73 | 		vec3 rd_1 = inv_proj_rx*vertex_1.x + inv_proj_ry*vertex_1.y + inv_proj_rw;
 74 | 
 75 | 		vec3 obj_space_vertex_0 = inv_proj_rp + (inv_proj_a / rd_0.z)*rd_0;
 76 | 		vec3 obj_space_vertex_1 = inv_proj_rp + (inv_proj_a / rd_1.z)*rd_1;
 77 | 
 78 | 		// 3. transform to gradient space.
 79 | 		gradient_coord_0 = vec3(
 80 | 			obj_space_vertex_0.x*word0.xw +
 81 | 			obj_space_vertex_0.y*vec2(word0.y, word1.x) +
 82 | 			vec2(word0.z, word1.y),
 83 | 			1.0);
 84 | 
 85 | 		gradient_coord_1 = vec3(
 86 | 			obj_space_vertex_1.x*word0.xw +
 87 | 			obj_space_vertex_1.y*vec2(word0.y, word1.x) +
 88 | 			vec2(word0.z, word1.y),
 89 | 			1.0);
 90 | 
 91 | 		// 4. ramp.
 92 | 		int ramp_coordi = floatBitsToInt(word1.z);
 93 | 		gradient_ramp_coord = vec3(
 94 | 			(float(ramp_coordi & 1023) + 0.5)*(1.0 / 1024.0),
 95 | 			(float(ramp_coordi >> 10) + 0.5)*pid2depth_irampheight.z,
 96 | 			word1.w);
 97 | 	}
 98 | 	else {
 99 | 		gradient_ramp_coord.z = 0.0;
100 | 		vec4 color = u8rgba2frgba(colori);
101 | 		fragment_color = enable_srgb_correction ? srgb_to_lrgb(color) : color;
102 | 	}
103 | }
104 | 
105 | // -------- -------- -------- --------
106 | void main() {
107 | 
108 | 	int index = gl_VertexID >> 1;
109 | 	int line_vi = gl_VertexID & 1;
110 | 
111 | 	ivec4 draw = texelFetch(tb_index, index);
112 | 
113 | 	path_frag_pos = ivec2(draw.x & 0xFFFF, draw.x >> 16);
114 | 
115 | 	vec2 pos = vec2(
116 | 		path_frag_pos.x + line_vi * draw.y,
117 | 		path_frag_pos.y
118 | 		);
119 | 
120 | 	calc_color(draw.z, pos);
121 | 
122 | 	pos.y += 1;
123 | 
124 | 	pos.x = pos.x / float(vp_size.x) * 2 - 1.0;
125 | 	pos.y = pos.y / float(vp_size.y) * 2 - 1.0;
126 | 
127 | 	gl_Position = vec4(pos, 0, 1);
128 | 
129 | 	pixel_mask = (draw.w == 0) ? 0xFFFFFFFF
130 | 		: texelFetch(tb_stencil_mask, draw.w - 1).r;
131 | }
132 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/output_32.vert.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #version 450
  3 | 
  4 | // -------- -------- -------- -------- -------- -------- -------- --------
  5 | layout(binding = 0) uniform isamplerBuffer tb_index;
  6 | layout(binding = 1) uniform isamplerBuffer tb_span;
  7 | layout(binding = 2) uniform isamplerBuffer tb_path_fragment;
  8 | layout(binding = 3) uniform isamplerBuffer tb_stencil_mask;
  9 | 
 10 | layout(binding = 4) uniform samplerBuffer tex_table;
 11 | layout(binding = 5) uniform sampler2D tex_ramp;
 12 | 
 13 | // -------- -------- -------- -------- -------- -------- -------- --------
 14 | uniform vec3 pid2depth_irampheight;
 15 | uniform vec3 inv_proj_rx;
 16 | uniform vec3 inv_proj_ry;
 17 | uniform vec3 inv_proj_rw;
 18 | uniform vec3 inv_proj_rp;
 19 | uniform float inv_proj_a;
 20 | 
 21 | uniform bool enable_srgb_correction;
 22 | 
 23 | uniform ivec2 vp_size;
 24 | 
 25 | // -------- -------- -------- -------- -------- -------- -------- --------
 26 | flat out vec4 fragment_color;
 27 | 
 28 | flat out ivec2 path_frag_pos;
 29 | 
 30 | flat out ivec4 pixel_mask;
 31 | 
 32 | // -------- -------- -------- -------- -------- -------- -------- --------
 33 | out vec3 gradient_coord_0;
 34 | out vec3 gradient_coord_1;
 35 | flat out vec3 gradient_ramp_coord;
 36 | flat out vec3 gradient_focal_point;
 37 | 
 38 | // -------- -------- -------- -------- -------- -------- -------- --------
 39 | vec4 u8rgba2frgba(int c) {
 40 | 	return vec4(c & 0xFF, (c >> 8) & 0xFF, (c >> 16) & 0xFF, (c >> 24) & 0xFF) / 255.0;
 41 | }
 42 | 
 43 | // -------- -------- -------- -------- -------- -------- -------- --------
 44 | float srgb_to_lrgb_f(float f) {
 45 | 	if (f <= 0.04045f) { return f / 12.92f; }
 46 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
 47 | }
 48 | 
 49 | vec4 srgb_to_lrgb(vec4 c) {
 50 | 	return vec4(srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a);
 51 | }
 52 | 
 53 | // -------- -------- -------- --------
 54 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
 55 | 
 56 | // -------- -------- -------- --------
 57 | void calc_color(int colori, vec2 vertex) {
 58 | 
 59 | 	if (uint(colori - 1) < uint(0x01000000)) {
 60 | 
 61 | 		// 1. fetch gradient transform & focal point.
 62 | 		int path_id = (colori - 1) * 3;
 63 | 		vec4 word0 = texelFetch(tex_table, path_id);
 64 | 		vec4 word1 = texelFetch(tex_table, path_id + 1);
 65 | 		gradient_focal_point = texelFetch(tex_table, path_id + 2).xyz;
 66 | 
 67 | 		// 2. transform back to object space.
 68 | 		vec2 vertex_0 = vertex + vec2(1.0, 0.5);
 69 | 		vec2 vertex_1 = vertex + vec2(1.0, 1.5);
 70 | 
 71 | 		vec3 rd_0 = inv_proj_rx*vertex_0.x + inv_proj_ry*vertex_0.y + inv_proj_rw;
 72 | 		vec3 rd_1 = inv_proj_rx*vertex_1.x + inv_proj_ry*vertex_1.y + inv_proj_rw;
 73 | 
 74 | 		vec3 obj_space_vertex_0 = inv_proj_rp + (inv_proj_a / rd_0.z)*rd_0;
 75 | 		vec3 obj_space_vertex_1 = inv_proj_rp + (inv_proj_a / rd_1.z)*rd_1;
 76 | 
 77 | 		// 3. transform to gradient space.
 78 | 		gradient_coord_0 = vec3(
 79 | 			obj_space_vertex_0.x*word0.xw +
 80 | 			obj_space_vertex_0.y*vec2(word0.y, word1.x) +
 81 | 			vec2(word0.z, word1.y),
 82 | 			1.0);
 83 | 
 84 | 		gradient_coord_1 = vec3(
 85 | 			obj_space_vertex_1.x*word0.xw +
 86 | 			obj_space_vertex_1.y*vec2(word0.y, word1.x) +
 87 | 			vec2(word0.z, word1.y),
 88 | 			1.0);
 89 | 
 90 | 		// 4. ramp.
 91 | 		int ramp_coordi = floatBitsToInt(word1.z);
 92 | 		gradient_ramp_coord = vec3(
 93 | 			(float(ramp_coordi & 1023) + 0.5)*(1.0 / 1024.0),
 94 | 			(float(ramp_coordi >> 10) + 0.5)*pid2depth_irampheight.z,
 95 | 			word1.w);
 96 | 	}
 97 | 	else {
 98 | 		gradient_ramp_coord.z = 0.0;
 99 | 		vec4 color = u8rgba2frgba(colori);
100 | 		fragment_color = enable_srgb_correction ? srgb_to_lrgb(color) : color;
101 | 	}
102 | }
103 | 
104 | // -------- -------- -------- --------
105 | void main() {
106 | 
107 | 	int index = gl_VertexID >> 1;
108 | 	int line_vi = gl_VertexID & 1;
109 | 
110 | 	ivec4 draw = texelFetch(tb_index, index);
111 | 
112 | 	path_frag_pos = ivec2(draw.x & 0xFFFF, draw.x >> 16);
113 | 
114 | 	vec2 pos = vec2(
115 | 		path_frag_pos.x + line_vi * draw.y,
116 | 		path_frag_pos.y
117 | 		);
118 | 
119 | 	calc_color(draw.z, pos);
120 | 
121 | 	pos.y += 1;
122 | 
123 | 	pos.x = pos.x / float(vp_size.x) * 2 - 1.0;
124 | 	pos.y = pos.y / float(vp_size.y) * 2 - 1.0;
125 | 
126 | 	gl_Position = vec4(pos, 0, 1);
127 | 
128 | 	pixel_mask = (draw.w == 0) ?
129 | 		ivec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
130 | 		: texelFetch(tb_stencil_mask, draw.w - 1);
131 | }
132 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/vg_config.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _MOCHIMAZUI_VG_CONFIG_H_
  3 | #define _MOCHIMAZUI_VG_CONFIG_H_
  4 | 
  5 | #include <cstdint>
  6 | #include <iostream>
  7 | #include <string>
  8 | #include <vector>
  9 | #include <map>
 10 | 
 11 | #include <boost/program_options.hpp>
 12 | 
 13 | #include <glm/glm.hpp>
 14 | 
 15 | #include <mochimazui/config.h>
 16 | 
 17 | #include "rasterizer/shared/ras_pipeline_mode.h"
 18 | 
 19 | namespace Mochimazui {
 20 | 
 21 | namespace PRIVATE {
 22 | extern boost::program_options::variables_map g_config_variables;
 23 | }
 24 | 
 25 | int init_config(int argc, char *argv[]);
 26 | 
 27 | template<class T>
 28 | T get_config(const std::string &key) {
 29 | 	using PRIVATE::g_config_variables;
 30 | 	if (g_config_variables.count(key)) {
 31 | 		return g_config_variables[key].as<T>();
 32 | 	}
 33 | 	throw std::runtime_error("Mochimazui::get_config: \"" + key + "\" not set");
 34 | }
 35 | 
 36 | // -------- -------- -------- -------- -------- -------- -------- --------
 37 | 
 38 | namespace VGConfig {
 39 | 
 40 | // general
 41 | 
 42 | inline bool Help() { return get_config<bool>("help"); }
 43 | 
 44 | inline bool Verbose() { return get_config<bool>("verbose"); }
 45 | inline bool GLDebug() { return get_config<bool>("gl-debug"); }
 46 | inline bool DrawCurve() { return get_config<bool>("draw-curve"); }
 47 | inline bool ShowFPS() { return get_config<bool>("show-fps"); }
 48 | 
 49 | inline bool Benchmark() { return get_config<bool>("benchmark"); }
 50 | inline bool StepTiming() { return get_config<bool>("step-timing"); }
 51 | inline const std::string AttachTimingToFile() {
 52 | 	return get_config<std::string>("attach-timing-to");
 53 | }
 54 | 
 55 | inline bool MergeAdjacentPath() { return get_config<bool>("merge-path"); }
 56 | //inline bool MinimalUI() { return get_config<bool>("minimal-ui"); }
 57 | inline bool MinimalUI() { return true; }
 58 | 
 59 | inline bool OutputVerticalFlip() { return get_config <bool>("v-flip"); }
 60 | 
 61 | inline bool Animation() { return get_config<bool>("animation"); }
 62 | 
 63 | // input / output
 64 | 
 65 | inline std::string Name() { return get_config<std::string>("input-name"); }
 66 | inline std::string InputName() { return get_config<std::string>("input-name"); }
 67 | inline std::string InputFile() { return get_config<std::string>("input-file"); }
 68 | inline int InputWidth() { return get_config<int>("input-width"); }
 69 | inline int InputHeight() { return get_config<int>("input-height"); };
 70 | 
 71 | inline int WindowWidth() { return get_config<int>("window-width"); }
 72 | inline int WindowHeight() { return get_config<int>("window-height"); }
 73 | inline glm::ivec2 WindowSize() {
 74 | 	return glm::ivec2(WindowWidth(), WindowHeight());
 75 | }
 76 | 
 77 | inline bool FitVGToWindowSize() { return get_config<bool>("fit-to-window"); }
 78 | inline bool FitWindowToVGSize() { return get_config<bool>("fit-to-vg"); }
 79 | 
 80 | inline bool SaveOutputFile() { return get_config<bool>("save-output-file"); }
 81 | inline std::string OutputFile() { return get_config<std::string>("output-file"); }
 82 | 
 83 | inline int OutputWidth() { return get_config<int>("output-width"); }
 84 | inline int OutputHeight() { return get_config<int>("output-height"); }
 85 | inline glm::ivec2 OutputSize() {
 86 | 	return glm::ivec2(OutputWidth(), OutputHeight());
 87 | }
 88 | inline bool FixOutputSize() { return get_config<bool>("fix-output-size"); }
 89 | 
 90 | // rasterizer config
 91 | 
 92 | inline RasterizerPipelineMode PipelineMode() { 
 93 | 	if (get_config<bool>("c-m-cs")) {
 94 | 		return PM_Cut_Mask_Comb_Scanline;
 95 | 	}
 96 | 	else {
 97 | 		return PM_Cut_Mask_Comb_Scanline;
 98 | 	}	
 99 | }
100 | 
101 | inline bool linearRGB() { return get_config<bool>("lrgb"); }
102 | inline bool sRGB() { return get_config<bool>("srgb"); }
103 | 
104 | inline int Samples() { return get_config<int>("samples"); }
105 | inline bool MultisampleOutput() { return get_config<bool>("ms-output"); }
106 | 
107 | inline bool UseMaskTable() { return true; }
108 | 
109 | inline int ReserveInk() { return get_config<int>("reserve-ink"); }
110 | inline bool TigerClip() { return get_config<bool>("tiger-clip"); }
111 | 
112 | inline bool BreakBeforeGL() { return get_config<bool>("break-before-gl"); }
113 | 
114 | inline bool A128() { return get_config<bool>("a128"); }
115 | 
116 | inline bool CountPixel() { return get_config<bool>("count-pixel"); }
117 | inline std::string AttachPixelCountToFile() { return get_config<std::string>("attach-pixel-count-to"); }
118 | 
119 | }
120 | 
121 | }
122 | 
123 | #endif
124 | 


--------------------------------------------------------------------------------
/working_directory/shader/R_cut_A_stencil/ms_output_32.vert.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #version 450
  3 | 
  4 | // -------- -------- -------- -------- -------- -------- -------- --------
  5 | layout(binding = 0) uniform isamplerBuffer tb_index;
  6 | layout(binding = 1) uniform isamplerBuffer tb_span;
  7 | layout(binding = 2) uniform isamplerBuffer tb_path_fragment;
  8 | layout(binding = 3) uniform isamplerBuffer tb_stencil_mask;
  9 | 
 10 | layout(binding = 4) uniform samplerBuffer tex_table;
 11 | layout(binding = 5) uniform sampler2D tex_ramp;
 12 | 
 13 | // -------- -------- -------- -------- -------- -------- -------- --------
 14 | uniform vec3 pid2depth_irampheight;
 15 | uniform vec3 inv_proj_rx;
 16 | uniform vec3 inv_proj_ry;
 17 | uniform vec3 inv_proj_rw;
 18 | uniform vec3 inv_proj_rp;
 19 | uniform float inv_proj_a;
 20 | 
 21 | uniform bool enable_srgb_correction;
 22 | 
 23 | uniform ivec2 vp_size;
 24 | 
 25 | // -------- -------- -------- -------- -------- -------- -------- --------
 26 | //flat out int fragment_type;
 27 | flat out vec4 fragment_color;
 28 | 
 29 | flat out ivec2 path_frag_pos;
 30 | 
 31 | flat out ivec4 pixel_mask;
 32 | 
 33 | // -------- -------- -------- -------- -------- -------- -------- --------
 34 | out vec3 gradient_coord_0;
 35 | out vec3 gradient_coord_1;
 36 | flat out vec3 gradient_ramp_coord;
 37 | flat out vec3 gradient_focal_point;
 38 | 
 39 | // -------- -------- -------- -------- -------- -------- -------- --------
 40 | vec4 u8rgba2frgba(int c) {
 41 | 	return vec4(c & 0xFF, (c >> 8) & 0xFF, (c >> 16) & 0xFF, (c >> 24) & 0xFF) / 255.0;
 42 | }
 43 | 
 44 | // -------- -------- -------- -------- -------- -------- -------- --------
 45 | float srgb_to_lrgb_f(float f) {
 46 | 	if (f <= 0.04045f) { return f / 12.92f; }
 47 | 	else { return pow((f + 0.055f) / (1.f + 0.055f), 2.4f); }
 48 | }
 49 | 
 50 | vec4 srgb_to_lrgb(vec4 c) {
 51 | 	return vec4(srgb_to_lrgb_f(c.r), srgb_to_lrgb_f(c.g), srgb_to_lrgb_f(c.b), c.a);
 52 | }
 53 | 
 54 | // -------- -------- -------- --------
 55 | float safeRcpP(float a) { return a > 1e-6 ? 1 / a : 0.0; }
 56 | 
 57 | // -------- -------- -------- --------
 58 | void calc_color(int colori, vec2 vertex) {
 59 | 
 60 | 	if (uint(colori - 1) < uint(0x01000000)) {
 61 | 
 62 | 		// 1. fetch gradient transform & focal point.
 63 | 		int path_id = (colori - 1) * 3;
 64 | 		vec4 word0 = texelFetch(tex_table, path_id);
 65 | 		vec4 word1 = texelFetch(tex_table, path_id + 1);
 66 | 		gradient_focal_point = texelFetch(tex_table, path_id + 2).xyz;
 67 | 
 68 | 		// 2. transform back to object space.
 69 | 		vec2 vertex_0 = vertex + vec2(0.0, 0.5);
 70 | 		vec2 vertex_1 = vertex + vec2(0.0, 1.5);
 71 | 
 72 | 		vec3 rd_0 = inv_proj_rx*vertex_0.x + inv_proj_ry*vertex_0.y + inv_proj_rw;
 73 | 		vec3 rd_1 = inv_proj_rx*vertex_1.x + inv_proj_ry*vertex_1.y + inv_proj_rw;
 74 | 
 75 | 		vec3 obj_space_vertex_0 = inv_proj_rp + (inv_proj_a / rd_0.z)*rd_0;
 76 | 		vec3 obj_space_vertex_1 = inv_proj_rp + (inv_proj_a / rd_1.z)*rd_1;
 77 | 
 78 | 		// 3. transform to gradient space.
 79 | 		gradient_coord_0 = vec3(
 80 | 			obj_space_vertex_0.x*word0.xw +
 81 | 			obj_space_vertex_0.y*vec2(word0.y, word1.x) +
 82 | 			vec2(word0.z, word1.y),
 83 | 			1.0);
 84 | 
 85 | 		gradient_coord_1 = vec3(
 86 | 			obj_space_vertex_1.x*word0.xw +
 87 | 			obj_space_vertex_1.y*vec2(word0.y, word1.x) +
 88 | 			vec2(word0.z, word1.y),
 89 | 			1.0);
 90 | 
 91 | 		// 4. ramp.
 92 | 		int ramp_coordi = floatBitsToInt(word1.z);
 93 | 		gradient_ramp_coord = vec3(
 94 | 			(float(ramp_coordi & 1023) + 0.5)*(1.0 / 1024.0),
 95 | 			(float(ramp_coordi >> 10) + 0.5)*pid2depth_irampheight.z,
 96 | 			word1.w);
 97 | 	}
 98 | 	else {
 99 | 		gradient_ramp_coord.z = 0.0;
100 | 		vec4 color = u8rgba2frgba(colori);
101 | 		fragment_color = enable_srgb_correction ? srgb_to_lrgb(color) : color;
102 | 	}
103 | }
104 | 
105 | // -------- -------- -------- --------
106 | void main() {
107 | 
108 | 	int index = gl_VertexID >> 1;
109 | 	int line_vi = gl_VertexID & 1;
110 | 
111 | 	ivec4 draw = texelFetch(tb_index, index);
112 | 
113 | 	path_frag_pos = ivec2( draw.x & 0xFFFF, draw.x >> 16 );
114 | 
115 | 	vec2 pos = vec2(
116 | 		path_frag_pos.x + line_vi * draw.y,
117 | 		path_frag_pos.y
118 | 	);
119 | 
120 | 	calc_color(draw.z, pos);
121 | 
122 | 	pos.y += 1;
123 | 
124 | 	pos.x = pos.x / float(vp_size.x) * 2 - 1.0;
125 | 	pos.y = pos.y / float(vp_size.y) * 2 - 1.0;
126 | 
127 | 	gl_Position = vec4(pos, 0, 1);
128 | 
129 | 	pixel_mask = (draw.w == 0) ? 
130 | 		ivec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
131 | 		: texelFetch(tb_stencil_mask, draw.w -1);
132 | }
133 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/util/util.h:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "static.h"
 38 | 
 39 | namespace mgpu {
 40 | 
 41 | extern int Rand(int min, int max);
 42 | extern int64 Rand(int64 min, int64 max);
 43 | extern uint Rand(uint min, uint max);
 44 | extern uint64 Rand(uint64 min, uint64 max);
 45 | extern float Rand(float min, float max);
 46 | extern double Rand(double min, double max);
 47 | 
 48 | 
 49 | ////////////////////////////////////////////////////////////////////////////////
 50 | // intrusive_ptr
 51 | 
 52 | // boost::noncopyable, moved here so we don't have dependency on boost
 53 | class noncopyable {
 54 | protected:
 55 | 	noncopyable() {}
 56 | 	~noncopyable() {}
 57 | private:
 58 | 	noncopyable(const noncopyable&) { }
 59 | 	const noncopyable& operator=(const noncopyable&) { return *this; }
 60 | };
 61 | 
 62 | class CudaBase : public noncopyable {
 63 | public:
 64 | 	CudaBase() : _ref(0) { }
 65 | 	virtual ~CudaBase() { }
 66 | 	virtual long AddRef() {
 67 | 	//	return BOOST_INTERLOCKED_INCREMENT(&_ref);
 68 | 		return ++_ref;
 69 | 	}
 70 | 	virtual void Release() {
 71 | 	//	if(!BOOST_INTERLOCKED_DECREMENT(&_ref)) delete this;
 72 | 		if(!--_ref) delete this;		
 73 | 	}
 74 | private:
 75 | 	long _ref;
 76 | };
 77 | 
 78 | inline long intrusive_ptr_add_ref(CudaBase* base) {
 79 | 	return base->AddRef();
 80 | }
 81 | 
 82 | inline void intrusive_ptr_release(CudaBase* base) {
 83 | 	base->Release();
 84 | }
 85 | 
 86 | template<typename T>
 87 | class intrusive_ptr {
 88 | public:
 89 | 	intrusive_ptr() : _p(0) { }
 90 | 	explicit intrusive_ptr(T* p) : _p(p) {
 91 | 		if(p) intrusive_ptr_add_ref(p);
 92 | 	}
 93 | 	intrusive_ptr(const intrusive_ptr<T>& rhs) : _p(rhs._p) {
 94 | 		if(_p) intrusive_ptr_add_ref(_p);
 95 | 	}
 96 | 	~intrusive_ptr() {
 97 | 		if(_p) intrusive_ptr_release(_p);
 98 | 	}
 99 | 	intrusive_ptr& operator=(const intrusive_ptr& rhs) {
100 | 		intrusive_ptr(rhs.get()).swap(*this);
101 | 		return *this;
102 | 	}
103 | 
104 | 	void reset(T* p = 0) {
105 | 		intrusive_ptr(p).swap(*this);
106 | 	}
107 | 	T* release() {
108 | 		T* p = _p;
109 | 		_p = 0;
110 | 		return p;
111 | 	}
112 | 
113 | 	T* get() const { return _p; }
114 | 	operator T*() const { return _p; }
115 | 	T* operator->() const { return _p; }
116 | 	
117 | 	void swap(intrusive_ptr& rhs) {
118 | 		std::swap(_p, rhs._p);
119 | 	}
120 | private:
121 | 	T* _p;
122 | };
123 | 
124 | } // namespace mgpu
125 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/camera_controller_3d.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "camera_controller_3d.h"
  3 | 
  4 | namespace Mochimazui {
  5 | 
  6 | 	void CameraController3D::init(int width, int height)  {
  7 | 		_sceneWidth = width;
  8 | 		_sceneHeight = height;
  9 | 		_rotateCenter = glm::vec3(width / 2.f, height / 2.f, 0.f);
 10 | 		_rotateNormal = glm::vec3(0.f, 0.f, 1.f);
 11 | 		_walkx = glm::vec3(1.f, 0.f, 0.f);
 12 | 		_walky = glm::vec3(0.f, -1.f, 0.f);
 13 | 	}
 14 | 
 15 | 	void CameraController3D::fitToView(int vWidth, int vHeight) {
 16 | 
 17 | 		//vWidth += 1;
 18 | 		//vHeight += 1;
 19 | 
 20 | 		Camera3D::reset();
 21 | 
 22 | 		float sw = (float)vWidth / (float)_sceneWidth;
 23 | 		float sh = (float)vHeight / (float)_sceneHeight;
 24 | 		float s = sw < sh ? sw : sh;
 25 | 		Camera3D::scale(glm::vec3(s, s, s));
 26 | 
 27 | 		float nw = _sceneWidth * s;
 28 | 		float nh = _sceneHeight * s;
 29 | 
 30 | 		float dw = abs(vWidth - nw) *.5f;
 31 | 		float dh = abs(vHeight - nh) * .5f;
 32 | 
 33 | 		Camera3D::translate(glm::vec3(dw, dh, 0));
 34 | 
 35 | 		init(vWidth, vHeight);
 36 | 	}
 37 | 
 38 | 	//
 39 | 	glm::mat4x4 CameraController3D::projectionMatrix() {
 40 | 
 41 | 		float W = (float)_sceneWidth;
 42 | 		float H = (float)_sceneHeight;
 43 | 		float W2 = W / 2.f;
 44 | 		float H2 = H / 2.f;
 45 | 
 46 | 		float S = W < H ? W : H;
 47 | 
 48 | 		auto mat =
 49 | 			glm::mat4x4(
 50 | 			1, 0, 0, 0,
 51 | 			0, 1, 0, 0,
 52 | 			0, 0, 1, 0,
 53 | 			W2, H2, -S, 1
 54 | 			)
 55 | 			*
 56 | 			glm::mat4x4(
 57 | 			S, 0, 0, 0,
 58 | 			0, S, 0, 0,
 59 | 			0, 0, 1, 1,
 60 | 			0, 0, 0, 0
 61 | 			)
 62 | 			*
 63 | 			glm::mat4x4(
 64 | 			1, 0, 0, 0,
 65 | 			0, 1, 0, 0,
 66 | 			0, 0, 1, 0,
 67 | 			-W2, -H2, S, 1
 68 | 			);
 69 | 
 70 | 		return mat;
 71 | 	}
 72 | 
 73 | 	//
 74 | 	glm::mat4x4 CameraController3D::modelViewMatrix() {
 75 | 		return Camera3D::matrix();
 76 | 	}
 77 | 
 78 | 	void CameraController3D::leftButtonDown(int x, int y) {
 79 | 		_leftButton = true;
 80 | 		_lastPos = _leftButtonClickPos = ivec2(x, y);
 81 | 	}
 82 | 
 83 | 	void CameraController3D::leftButtonUp(int x, int y) {
 84 | 		_leftButton = false;
 85 | 	}
 86 | 
 87 | 	void CameraController3D::rightButtonDown(int x, int y) {
 88 | 		_rightButton = true;
 89 | 		_lastPos = _rightButtonClickPos = ivec2(x, y);
 90 | 	}
 91 | 
 92 | 	void CameraController3D::rightButtonUp(int x, int y) {
 93 | 		_rightButton = false;
 94 | 	}
 95 | 
 96 | 	void CameraController3D::move(int x, int y) {
 97 | 		switch (_controllerMode) {
 98 | 		case CCM_NULL:
 99 | 			break;
100 | 		case CCM_MOVE:
101 | 			handleMove(x, y);
102 | 			break;
103 | 		case CCM_TURN:
104 | 			handleTurn(x, y);
105 | 			break;
106 | 		case CCM_ROTATE:
107 | 			handleRotate(x, y);
108 | 			break;
109 | 		default:
110 | 			throw std::runtime_error("CameraController3D::unsupported controller mode");
111 | 		}
112 | 	}
113 | 
114 | 	// -------- -------- -------- -------- -------- -------- -------- --------
115 | 	void CameraController3D::handleMove(int x, int y) {
116 | 		ivec2 cp(x, y);
117 | 		if (_leftButton || _rightButton) {
118 | 			ivec2 delta = cp - _lastPos;
119 | 			delta.y *= -1;
120 | 			Camera3D::translate(delta.x * _walkx + delta.y * _walky);
121 | 		}
122 | 		_lastPos = cp;
123 | 	}
124 | 
125 | 	void CameraController3D::handleTurn(int x, int y) {
126 | 
127 | 		if (!_leftButton) {	return;}
128 | 
129 | 		static const double RV = 0.01;
130 | 		ivec2 cp(x, y);
131 | 		glm::vec2 delta = cp - _lastPos;
132 | 		//delta.y *= -1;
133 | 		delta *= RV;
134 | 
135 | 		auto tv3 = [](const glm::mat4x4& m, glm::vec3 &r3) {
136 | 			auto r4 = glm::vec4(r3.x, r3.y, r3.z, 0.f);
137 | 			r4 = m * r4;
138 | 			r4 = glm::normalize(r4);
139 | 			r3 = glm::vec3(r4.x, r4.y, r4.z);
140 | 		};
141 | 
142 | 		Camera3D::turn_y(delta.y, _rotateCenter.y);
143 | 		auto m = glm::rotate(delta.y, glm::vec3(1, 0, 0));
144 | 		tv3(m, _rotateNormal);
145 | 		tv3(m, _walkx);
146 | 		tv3(m, _walky);
147 | 
148 | 		_lastPos = cp;
149 | 	}
150 | 
151 | 	void CameraController3D::handleRotate(int x, int y) {
152 | 
153 | 		if (!_leftButton) { return; }
154 | 
155 | 		static const double RV = 0.001;
156 | 		ivec2 cp(x, y);
157 | 		glm::vec2 delta = cp - _lastPos;
158 | 		delta.y *= -1;
159 | 		delta *= RV;
160 | 
161 | 		auto tv3 = [](const glm::mat4x4& m, glm::vec3 &r3) {
162 | 			auto r4 = glm::vec4(r3.x, r3.y, r3.z, 0.f);
163 | 			r4 = m * r4;
164 | 			r4 = glm::normalize(r4);
165 | 			r3 = glm::vec3(r4.x, r4.y, r4.z);
166 | 		};
167 | 
168 | 		Camera3D::rotate_cn(delta.x, _rotateCenter, _rotateNormal);
169 | 		auto m = glm::rotate(delta.x, _rotateNormal);
170 | 
171 | 		_lastPos = cp;
172 | 	}
173 | 
174 | }


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_define.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _MOCHIMAZUI_RASTERIZER_SHARED_DEFINE_H_
  3 | #define _MOCHIMAZUI_RASTERIZER_SHARED_DEFINE_H_
  4 | 
  5 | #include <cuda.h>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | #include <mochimazui/3rd/helper_cuda.h>
  9 | 
 10 | // -------- -------- -------- -------- -------- -------- -------- --------
 11 | // CONST
 12 | 
 13 | //#define VG_RASTERIZER_FRAGMENT_TEXTURE_WIDTH (1024)
 14 | //#define VG_RASTERIZER_FRAGMENT_TEXTURE_HEIGHT (1024)
 15 | 
 16 | //#define VG_RASTERIZER_FRAGMENT_TEXTURE_WIDTH (2048)
 17 | //#define VG_RASTERIZER_FRAGMENT_TEXTURE_HEIGHT (2048)
 18 | 
 19 | #define VG_RASTERIZER_FRAGMENT_TEXTURE_WIDTH (4096)
 20 | #define VG_RASTERIZER_FRAGMENT_TEXTURE_HEIGHT (4096)
 21 | 
 22 | #define VG_RASTERIZER_BIG_FRAGMENT_SIZE (2)
 23 | 
 24 | // -------- -------- -------- -------- -------- -------- -------- --------
 25 | // CUDA
 26 | 
 27 | #define CUDA_DEVICE_SYNC_AND_CHECK_ERROR(msg) { \
 28 | 	cudaDeviceSynchronize(); __getLastCudaError (msg, __FILE__, __LINE__); }
 29 | 
 30 | //
 31 | #ifdef _DEBUG
 32 | #define DEBUG_CUDA_DEVICE_SYNC() cudaDeviceSynchronize()
 33 | #define DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(msg) { \
 34 | 	cudaDeviceSynchronize(); __getLastCudaError (msg, __FILE__, __LINE__); }
 35 | #else
 36 | #define DEBUG_CUDA_DEVICE_SYNC()
 37 | #define DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(msg)
 38 | //#define DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(msg) { \
 39 | //	cudaDeviceSynchronize(); __getLastCudaError (msg, __FILE__, __LINE__); }
 40 | //#define DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(msg) {timer_print(msg,__FILE__, __LINE__);}
 41 | #endif
 42 | 
 43 | #define GET_ID() (blockDim.x * blockIdx.x + threadIdx.x)
 44 | 
 45 | inline int divup(int a, int b) { return (a + (b - 1)) / b; }
 46 | #define LAUNCH(kernel,N,NT,args) {kernel <<< divup(N,NT),NT >>>args; \
 47 |     DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(#kernel);}
 48 | //#define LAUNCH(kernel,N,NT,args) {kernel <<< (N+(NT-1))/NT,NT >>>args; \
 49 | //	DEBUG_CUDA_DEVICE_SYNC_AND_CHECK_ERROR(#kernel);}
 50 | 
 51 | #define ASSERT(a)
 52 | 
 53 | // -------- -------- -------- -------- -------- -------- -------- --------
 54 | // GL 
 55 | 
 56 | #ifdef _DEBUG
 57 | #define DEBUG_GL_FINISH() glFinish()
 58 | #else
 59 | #define DEBUG_GL_FINISH()
 60 | //#define DEBUG_GL_FINISH() glFinish()
 61 | #endif
 62 | 
 63 | #define SHADER_DEFINE_TEXT(text) #text
 64 | #define SHADER_DEFINE(name) #name" "SHADER_DEFINE_TEXT(name)
 65 | #define SHADER_REDEFINE(new_name, old_name) #new_name" "SHADER_DEFINE_TEXT(old_name)
 66 | 
 67 | #ifdef __CUDACC__
 68 | #ifndef __ldg
 69 | #define __ldg(a) (*(a))
 70 | #pragma comment( user, "__ldg not defined" ) 
 71 | #endif
 72 | #endif
 73 | 
 74 | // -------- -------- -------- -------- -------- -------- -------- --------
 75 | //#define QM_MASK_TABLE_RES 256
 76 | //#define QM_MASK_TABLE_PACKING_SCALE 0.5f
 77 | //#define QM_MASK_TABLE_N_SAMPLES 128
 78 | //#define QM_MASK_TABLE_FETCH_TEST_RES 33
 79 | 
 80 | // -------- -------- -------- -------- -------- -------- -------- --------
 81 | 
 82 | //#define ENABLE_MPVG_SHIFT
 83 | //#define ENABLE_NVPR_SHIFT
 84 | 
 85 | // -------- -------- -------- -------- -------- -------- -------- --------
 86 | // -------- -------- -------- -------- -------- -------- -------- --------
 87 | // path visible flag:
 88 | //
 89 | //         y > height
 90 | //
 91 | //         5 | 6 | 7
 92 | // x < 0   3 | x | 4  x > width
 93 | //         0 | 1 | 2 
 94 | //
 95 | //           y < 0
 96 | //
 97 | 
 98 | // -------- -------- -------- --------
 99 | // Big endian
100 | // 012: 11100000 0x0101010000000000 : ~ 0x0000000101010101
101 | // 247: 00101001 0x0000010001000001 : ~ 0x0101000100010100
102 | // 567: 00000111 0x0000000000010101 : ~ 0x0101010101000000
103 | // 035: 10010100 0x0100000100010000 : ~ 0x0001010001000101
104 | 
105 | //#define PATH_INVISIBLE(mask) ( \
106 | //	(!(mask & 0x0000000101010101)) \
107 | // || (!(mask & 0x0101000100010100))  \
108 | // || (!(mask & 0x0101010101000000))  \
109 | // || (!(mask & 0x0001010001000101))  \
110 | //)
111 | 
112 | // -------- -------- -------- --------
113 | // little endian
114 | // 012: 11100000 00000111 : ~ 0x0101010101000000
115 | // 247: 00101001 10010100 : ~ 0x0001010001000101
116 | // 567: 00000111 11100000 : ~ 0x0000000101010101
117 | // 035: 10010100 00101001 : ~ 0x0101000100010100
118 | 
119 | #define PATH_INVISIBLE(mask) ( \
120 | 	(!(mask & 0x0101010101000000)) \
121 |  || (!(mask & 0x0001010001000101))  \
122 |  || (!(mask & 0x0000000101010101))  \
123 |  || (!(mask & 0x0101000100010100))  \
124 | )
125 | 
126 | // -------- -------- -------- --------
127 | #define PATH_VISIBLE(mask) (!(PATH_INVISIBLE(mask)))
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | 3rd/
  5 | 
  6 | *.swp
  7 | *.dll
  8 | *.opendb
  9 | *.xlsx
 10 | 
 11 | # User-specific files
 12 | *.suo
 13 | *.user
 14 | *.userosscache
 15 | *.sln.docstates
 16 | 
 17 | # User-specific files (MonoDevelop/Xamarin Studio)
 18 | *.userprefs
 19 | 
 20 | # Build results
 21 | [Dd]ebug/
 22 | [Dd]ebugPublic/
 23 | [Rr]elease/
 24 | [Rr]eleases/
 25 | x64/
 26 | x86/
 27 | build/
 28 | bld/
 29 | [Bb]in/
 30 | [Oo]bj/
 31 | 
 32 | # Visual Studio 2015 cache/options directory
 33 | .vs/
 34 | # Uncomment if you have tasks that create the project's static files in wwwroot
 35 | #wwwroot/
 36 | 
 37 | # MSTest test Results
 38 | [Tt]est[Rr]esult*/
 39 | [Bb]uild[Ll]og.*
 40 | 
 41 | # NUNIT
 42 | *.VisualState.xml
 43 | TestResult.xml
 44 | 
 45 | # Build Results of an ATL Project
 46 | [Dd]ebugPS/
 47 | [Rr]eleasePS/
 48 | dlldata.c
 49 | 
 50 | # DNX
 51 | project.lock.json
 52 | artifacts/
 53 | 
 54 | *_i.c
 55 | *_p.c
 56 | *_i.h
 57 | *.ilk
 58 | *.meta
 59 | *.obj
 60 | *.pch
 61 | *.pdb
 62 | *.pgc
 63 | *.pgd
 64 | *.rsp
 65 | *.sbr
 66 | *.tlb
 67 | *.tli
 68 | *.tlh
 69 | *.tmp
 70 | *.tmp_proj
 71 | *.log
 72 | *.vspscc
 73 | *.vssscc
 74 | .builds
 75 | *.pidb
 76 | *.svclog
 77 | *.scc
 78 | 
 79 | # Chutzpah Test files
 80 | _Chutzpah*
 81 | 
 82 | # Visual C++ cache files
 83 | ipch/
 84 | *.aps
 85 | *.ncb
 86 | *.opensdf
 87 | *.sdf
 88 | *.cachefile
 89 | 
 90 | # Visual Studio profiler
 91 | *.psess
 92 | *.vsp
 93 | *.vspx
 94 | *.sap
 95 | 
 96 | # TFS 2012 Local Workspace
 97 | $tf/
 98 | 
 99 | # Guidance Automation Toolkit
100 | *.gpState
101 | 
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 | *.DotSettings.user
106 | 
107 | # JustCode is a .NET coding add-in
108 | .JustCode
109 | 
110 | # TeamCity is a build add-in
111 | _TeamCity*
112 | 
113 | # DotCover is a Code Coverage Tool
114 | *.dotCover
115 | 
116 | # NCrunch
117 | _NCrunch_*
118 | .*crunch*.local.xml
119 | nCrunchTemp_*
120 | 
121 | # MightyMoose
122 | *.mm.*
123 | AutoTest.Net/
124 | 
125 | # Web workbench (sass)
126 | .sass-cache/
127 | 
128 | # Installshield output folder
129 | [Ee]xpress/
130 | 
131 | # DocProject is a documentation generator add-in
132 | DocProject/buildhelp/
133 | DocProject/Help/*.HxT
134 | DocProject/Help/*.HxC
135 | DocProject/Help/*.hhc
136 | DocProject/Help/*.hhk
137 | DocProject/Help/*.hhp
138 | DocProject/Help/Html2
139 | DocProject/Help/html
140 | 
141 | # Click-Once directory
142 | publish/
143 | 
144 | # Publish Web Output
145 | *.[Pp]ublish.xml
146 | *.azurePubxml
147 | # TODO: Comment the next line if you want to checkin your web deploy settings 
148 | # but database connection strings (with potential passwords) will be unencrypted
149 | *.pubxml
150 | *.publishproj
151 | 
152 | # NuGet Packages
153 | *.nupkg
154 | # The packages folder can be ignored because of Package Restore
155 | **/packages/*
156 | # except build/, which is used as an MSBuild target.
157 | !**/packages/build/
158 | # Uncomment if necessary however generally it will be regenerated when needed
159 | #!**/packages/repositories.config
160 | 
161 | # Windows Azure Build Output
162 | csx/
163 | *.build.csdef
164 | 
165 | # Windows Store app package directory
166 | AppPackages/
167 | 
168 | # Visual Studio cache files
169 | # files ending in .cache can be ignored
170 | *.[Cc]ache
171 | # but keep track of directories ending in .cache
172 | !*.[Cc]ache/
173 | 
174 | # Others
175 | ClientBin/
176 | [Ss]tyle[Cc]op.*
177 | ~$*
178 | *~
179 | *.dbmdl
180 | *.dbproj.schemaview
181 | *.pfx
182 | *.publishsettings
183 | node_modules/
184 | orleans.codegen.cs
185 | 
186 | # RIA/Silverlight projects
187 | Generated_Code/
188 | 
189 | # Backup & report files from converting an old project file
190 | # to a newer Visual Studio version. Backup files are not needed,
191 | # because we have git ;-)
192 | _UpgradeReport_Files/
193 | Backup*/
194 | UpgradeLog*.XML
195 | UpgradeLog*.htm
196 | 
197 | # SQL Server files
198 | *.mdf
199 | *.ldf
200 | 
201 | # Business Intelligence projects
202 | *.rdl.data
203 | *.bim.layout
204 | *.bim_*.settings
205 | 
206 | # Microsoft Fakes
207 | FakesAssemblies/
208 | 
209 | # Node.js Tools for Visual Studio
210 | .ntvs_analysis.dat
211 | 
212 | # Visual Studio 6 build log
213 | *.plg
214 | 
215 | # Visual Studio 6 workspace options file
216 | *.opt
217 | 
218 | # Visual Studio LightSwitch build output
219 | **/*.HTMLClient/GeneratedArtifacts
220 | **/*.DesktopClient/GeneratedArtifacts
221 | **/*.DesktopClient/ModelManifest.xml
222 | **/*.Server/GeneratedArtifacts
223 | **/*.Server/ModelManifest.xml
224 | _Pvt_Extensions
225 | 
226 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/sparsematrix.h:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "util/static.h"
 38 | 
 39 | namespace mgpu {
 40 | 
 41 | struct SparseMatrix {
 42 | 	int height, width, nz;
 43 | 	std::vector<int> csr;				// height
 44 | 	std::vector<int> cols;				// nz
 45 | 	std::vector<double> matrix;			// nz
 46 | };
 47 | 
 48 | bool ReadSparseMatrix(FILE* f, std::auto_ptr<SparseMatrix>* ppMatrix,
 49 | 	std::string& err);
 50 | 
 51 | bool ReadSparseMatrix(const char* filename,
 52 | 	std::auto_ptr<SparseMatrix>* ppMatrix, std::string& err);
 53 | 
 54 | bool LoadBinaryMatrix(const char* filename,
 55 | 	std::auto_ptr<SparseMatrix>* ppMatrix);
 56 | 
 57 | bool StoreBinaryMatrix(const char* filename, const SparseMatrix& matrix);
 58 | 
 59 | bool LoadCachedMatrix(const char* filename, 
 60 | 	std::auto_ptr<SparseMatrix>* ppMatrix, std::string& err);
 61 | 
 62 | // Multiply the matrix by a vector of 1s.
 63 | template<typename T>
 64 | void SpmvTest(const SparseMatrix& m, T* results) {
 65 | 	memset(results, 0, sizeof(T) * m.height);
 66 | 	for(int row = 0; row < m.height; ++row) {
 67 | 		T product = 0;
 68 | 		int begin = m.csr[row];
 69 | 		int end = (row + 1 < m.height) ? m.csr[row + 1] : m.nz;
 70 | 		for(int i = begin; i < end; ++i)
 71 | 			product += (T)m.matrix[i];
 72 | 
 73 | 		results[row] = product;
 74 | 	}		
 75 | }
 76 | 
 77 | template<typename T>
 78 | void CompareVecs(const T* test, const T* ref, int count) {
 79 | 	for(int i = 0; i < count; ++i) {
 80 | 		double x = ref[i];
 81 | 		double y = test[i];
 82 | 		double diff = fabs(x - y);
 83 | 
 84 | 		if(diff > 1.0e-5) {
 85 | 			if(y > 0) {
 86 | 				if(1.01 * x < y || 0.99 * x > y) {
 87 | 					printf("BAD OUTPUT AT COMPONENT %d: %8.5e vs %8.5e\n", i,
 88 | 						x, y);
 89 | 				//	exit(0);
 90 | 					return;
 91 | 				}
 92 | 			} else {
 93 | 				if(1.01 * x > y || 0.99 * x < y) {
 94 | 					printf("BAD OUTPUT AT COMPONENT %d: %8.5e vs %8.5e\n", i, 
 95 | 						x, y);
 96 | 				//	exit(0);
 97 | 					return;
 98 | 				}
 99 | 			}
100 | 		}
101 | 	}
102 | }
103 | 
104 | struct MatrixStats {
105 | 	int height, width, nz;
106 | 	
107 | 	// Row density moments:
108 | 	double mean;
109 | 	double stddev;
110 | 	double skewness;
111 | };
112 | 
113 | MatrixStats ComputeMatrixStats(const SparseMatrix& m);
114 | 
115 | int64 MulSparseMatrices(const SparseMatrix& A, const SparseMatrix& B,
116 | 	std::auto_ptr<SparseMatrix>* ppC);
117 | 
118 | 
119 | int64 ComputeProductCount(const SparseMatrix& A, const SparseMatrix& B);
120 | 
121 | void ComputeColRanges(const SparseMatrix& A, const SparseMatrix& B,
122 | 	int* colMin, int* colMax);
123 | 
124 | } // namespace mgpu
125 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rapidxml_iterators.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef RAPIDXML_ITERATORS_HPP_INCLUDED
  2 | #define RAPIDXML_ITERATORS_HPP_INCLUDED
  3 | 
  4 | // Copyright (C) 2006, 2009 Marcin Kalicinski
  5 | // Version 1.13
  6 | // Revision $DateTime: 2009/05/13 01:46:17 $
  7 | //! \file rapidxml_iterators.hpp This file contains rapidxml iterators
  8 | 
  9 | #include "rapidxml.hpp"
 10 | 
 11 | namespace rapidxml
 12 | {
 13 | 
 14 |     //! Iterator of child nodes of xml_node
 15 |     template<class Ch>
 16 |     class node_iterator
 17 |     {
 18 |     
 19 |     public:
 20 | 
 21 |         typedef typename xml_node<Ch> value_type;
 22 |         typedef typename xml_node<Ch> &reference;
 23 |         typedef typename xml_node<Ch> *pointer;
 24 |         typedef std::ptrdiff_t difference_type;
 25 |         typedef std::bidirectional_iterator_tag iterator_category;
 26 |         
 27 |         node_iterator()
 28 |             : m_node(0)
 29 |         {
 30 |         }
 31 | 
 32 |         node_iterator(xml_node<Ch> *node)
 33 |             : m_node(node->first_node())
 34 |         {
 35 |         }
 36 |         
 37 |         reference operator *() const
 38 |         {
 39 |             assert(m_node);
 40 |             return *m_node;
 41 |         }
 42 | 
 43 |         pointer operator->() const
 44 |         {
 45 |             assert(m_node);
 46 |             return m_node;
 47 |         }
 48 | 
 49 |         node_iterator& operator++()
 50 |         {
 51 |             assert(m_node);
 52 |             m_node = m_node->next_sibling();
 53 |             return *this;
 54 |         }
 55 | 
 56 |         node_iterator operator++(int)
 57 |         {
 58 |             node_iterator tmp = *this;
 59 |             ++this;
 60 |             return tmp;
 61 |         }
 62 | 
 63 |         node_iterator& operator--()
 64 |         {
 65 |             assert(m_node && m_node->previous_sibling());
 66 |             m_node = m_node->previous_sibling();
 67 |             return *this;
 68 |         }
 69 | 
 70 |         node_iterator operator--(int)
 71 |         {
 72 |             node_iterator tmp = *this;
 73 |             ++this;
 74 |             return tmp;
 75 |         }
 76 | 
 77 |         bool operator ==(const node_iterator<Ch> &rhs)
 78 |         {
 79 |             return m_node == rhs.m_node;
 80 |         }
 81 | 
 82 |         bool operator !=(const node_iterator<Ch> &rhs)
 83 |         {
 84 |             return m_node != rhs.m_node;
 85 |         }
 86 | 
 87 |     private:
 88 | 
 89 |         xml_node<Ch> *m_node;
 90 | 
 91 |     };
 92 | 
 93 |     //! Iterator of child attributes of xml_node
 94 |     template<class Ch>
 95 |     class attribute_iterator
 96 |     {
 97 |     
 98 |     public:
 99 | 
100 |         typedef typename xml_attribute<Ch> value_type;
101 |         typedef typename xml_attribute<Ch> &reference;
102 |         typedef typename xml_attribute<Ch> *pointer;
103 |         typedef std::ptrdiff_t difference_type;
104 |         typedef std::bidirectional_iterator_tag iterator_category;
105 |         
106 |         attribute_iterator()
107 |             : m_attribute(0)
108 |         {
109 |         }
110 | 
111 |         attribute_iterator(xml_node<Ch> *node)
112 |             : m_attribute(node->first_attribute())
113 |         {
114 |         }
115 |         
116 |         reference operator *() const
117 |         {
118 |             assert(m_attribute);
119 |             return *m_attribute;
120 |         }
121 | 
122 |         pointer operator->() const
123 |         {
124 |             assert(m_attribute);
125 |             return m_attribute;
126 |         }
127 | 
128 |         attribute_iterator& operator++()
129 |         {
130 |             assert(m_attribute);
131 |             m_attribute = m_attribute->next_attribute();
132 |             return *this;
133 |         }
134 | 
135 |         attribute_iterator operator++(int)
136 |         {
137 |             attribute_iterator tmp = *this;
138 |             ++this;
139 |             return tmp;
140 |         }
141 | 
142 |         attribute_iterator& operator--()
143 |         {
144 |             assert(m_attribute && m_attribute->previous_attribute());
145 |             m_attribute = m_attribute->previous_attribute();
146 |             return *this;
147 |         }
148 | 
149 |         attribute_iterator operator--(int)
150 |         {
151 |             attribute_iterator tmp = *this;
152 |             ++this;
153 |             return tmp;
154 |         }
155 | 
156 |         bool operator ==(const attribute_iterator<Ch> &rhs)
157 |         {
158 |             return m_attribute == rhs.m_attribute;
159 |         }
160 | 
161 |         bool operator !=(const attribute_iterator<Ch> &rhs)
162 |         {
163 |             return m_attribute != rhs.m_attribute;
164 |         }
165 | 
166 |     private:
167 | 
168 |         xml_attribute<Ch> *m_attribute;
169 | 
170 |     };
171 | 
172 | }
173 | 
174 | #endif
175 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/mmio.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 | *   Matrix Market I/O library for ANSI C
  3 | *
  4 | *   See http://math.nist.gov/MatrixMarket for details.
  5 | *
  6 | *
  7 | */
  8 | 
  9 | #ifndef MM_IO_H
 10 | #define MM_IO_H
 11 | 
 12 | #define MM_MAX_LINE_LENGTH 1025
 13 | #define MatrixMarketBanner "%%MatrixMarket"
 14 | #define MM_MAX_TOKEN_LENGTH 64
 15 | 
 16 | typedef char MM_typecode[4];
 17 | 
 18 | char *mm_typecode_to_str(MM_typecode matcode);
 19 | 
 20 | int mm_read_banner(FILE *f, MM_typecode *matcode);
 21 | int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
 22 | int mm_read_mtx_array_size(FILE *f, int *M, int *N);
 23 | 
 24 | int mm_write_banner(FILE *f, MM_typecode matcode);
 25 | int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
 26 | int mm_write_mtx_array_size(FILE *f, int M, int N);
 27 | 
 28 | 
 29 | /********************* MM_typecode query fucntions ***************************/
 30 | 
 31 | #define mm_is_matrix(typecode)	((typecode)[0]=='M')
 32 | 
 33 | #define mm_is_sparse(typecode)	((typecode)[1]=='C')
 34 | #define mm_is_coordinate(typecode)((typecode)[1]=='C')
 35 | #define mm_is_dense(typecode)	((typecode)[1]=='A')
 36 | #define mm_is_array(typecode)	((typecode)[1]=='A')
 37 | 
 38 | #define mm_is_complex(typecode)	((typecode)[2]=='C')
 39 | #define mm_is_real(typecode)		((typecode)[2]=='R')
 40 | #define mm_is_pattern(typecode)	((typecode)[2]=='P')
 41 | #define mm_is_integer(typecode) ((typecode)[2]=='I')
 42 | 
 43 | #define mm_is_symmetric(typecode)((typecode)[3]=='S')
 44 | #define mm_is_general(typecode)	((typecode)[3]=='G')
 45 | #define mm_is_skew(typecode)	((typecode)[3]=='K')
 46 | #define mm_is_hermitian(typecode)((typecode)[3]=='H')
 47 | 
 48 | int mm_is_valid(MM_typecode matcode);		/* too complex for a macro */
 49 | 
 50 | 
 51 | /********************* MM_typecode modify fucntions ***************************/
 52 | 
 53 | #define mm_set_matrix(typecode)	((*typecode)[0]='M')
 54 | #define mm_set_coordinate(typecode)	((*typecode)[1]='C')
 55 | #define mm_set_array(typecode)	((*typecode)[1]='A')
 56 | #define mm_set_dense(typecode)	mm_set_array(typecode)
 57 | #define mm_set_sparse(typecode)	mm_set_coordinate(typecode)
 58 | 
 59 | #define mm_set_complex(typecode)((*typecode)[2]='C')
 60 | #define mm_set_real(typecode)	((*typecode)[2]='R')
 61 | #define mm_set_pattern(typecode)((*typecode)[2]='P')
 62 | #define mm_set_integer(typecode)((*typecode)[2]='I')
 63 | 
 64 | 
 65 | #define mm_set_symmetric(typecode)((*typecode)[3]='S')
 66 | #define mm_set_general(typecode)((*typecode)[3]='G')
 67 | #define mm_set_skew(typecode)	((*typecode)[3]='K')
 68 | #define mm_set_hermitian(typecode)((*typecode)[3]='H')
 69 | 
 70 | #define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
 71 | 									(*typecode)[2]=' ',(*typecode)[3]='G')
 72 | 
 73 | #define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
 74 | 
 75 | 
 76 | /********************* Matrix Market error codes ***************************/
 77 | 
 78 | 
 79 | #define MM_COULD_NOT_READ_FILE	11
 80 | #define MM_PREMATURE_EOF		12
 81 | #define MM_NOT_MTX				13
 82 | #define MM_NO_HEADER			14
 83 | #define MM_UNSUPPORTED_TYPE		15
 84 | #define MM_LINE_TOO_LONG		16
 85 | #define MM_COULD_NOT_WRITE_FILE	17
 86 | 
 87 | 
 88 | /******************** Matrix Market internal definitions ********************
 89 | 
 90 |    MM_matrix_typecode: 4-character sequence
 91 | 
 92 | 				    ojbect 		sparse/   	data        storage 
 93 | 						  		dense     	type        scheme
 94 | 
 95 |    string position:	 [0]        [1]			[2]         [3]
 96 | 
 97 |    Matrix typecode:  M(atrix)  C(oord)		R(eal)   	G(eneral)
 98 | 						        A(array)	C(omplex)   H(ermitian)
 99 | 											P(attern)   S(ymmetric)
100 | 								    		I(nteger)	K(kew)
101 | 
102 |  ***********************************************************************/
103 | 
104 | #define MM_MTX_STR		"matrix"
105 | #define MM_ARRAY_STR	"array"
106 | #define MM_DENSE_STR	"array"
107 | #define MM_COORDINATE_STR "coordinate" 
108 | #define MM_SPARSE_STR	"coordinate"
109 | #define MM_COMPLEX_STR	"complex"
110 | #define MM_REAL_STR		"real"
111 | #define MM_INT_STR		"integer"
112 | #define MM_GENERAL_STR  "general"
113 | #define MM_SYMM_STR		"symmetric"
114 | #define MM_HERM_STR		"hermitian"
115 | #define MM_SKEW_STR		"skew-symmetric"
116 | #define MM_PATTERN_STR  "pattern"
117 | 
118 | 
119 | /*  high level routines */
120 | 
121 | int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
122 | 		 double val[], MM_typecode matcode);
123 | int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
124 | 		double val[], MM_typecode matcode);
125 | int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
126 | 			MM_typecode matcode);
127 | 
128 | int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
129 |                 double **val_, int **I_, int **J_);
130 | 
131 | 
132 | 
133 | #endif
134 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/R_cut_A_mask_comb_scanline/ras_cut_mask_comb_scanline.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include "../shared/ras_base.h"
  5 | 
  6 | #include <map>
  7 | 
  8 | #include <cuda.h>
  9 | #include <host_defines.h>
 10 | 
 11 | #include "mochimazui/3rd/gl_4_5_compatibility.h"
 12 | #include <cuda_gl_interop.h>
 13 | #include "mochimazui/glpp.h"
 14 | #include "mochimazui/cuda_array.h"
 15 | 
 16 | namespace Mochimazui {
 17 | 
 18 | class VGContainer;
 19 | 
 20 | namespace Rasterizer_R_Cut_A_Mask_Comb_Scanline {
 21 | 
 22 | using GLPP::NamedBuffer;
 23 | using GLPP::NamedFramebuffer;
 24 | using GLPP::NamedTexture;
 25 | using GLPP::ShaderProgram;
 26 | 
 27 | using CUDATL::CUDAArray;
 28 | 
 29 | class VGRasterizer : public RasterizerBase::VGRasterizer {
 30 | 
 31 | 	typedef RasterizerBase::VGRasterizer _Base;
 32 | 
 33 | public:
 34 | 	VGRasterizer();
 35 | 	~VGRasterizer();
 36 | 
 37 | 	void init();
 38 | 	void uninit();
 39 | 
 40 | 	void addVg(const VGContainer &vgc);
 41 | 	void clear() {}
 42 | 
 43 | 	void setFragmentSize(int s) { _fragSize = s; }
 44 | 
 45 | 	void rasterizeImpl();
 46 | 
 47 | private:
 48 | 	void initProgram();
 49 | 	void initBuffer();
 50 | 	void initFramebuffer();
 51 | 
 52 | 	void initCommandList();
 53 | 	void uninitCommandList();
 54 | 
 55 | 	void onResize(int _width, int _height);
 56 | 
 57 | private:
 58 | 	void initQMMaskTable();
 59 | 
 60 | 	template <int FRAG_SIZE>
 61 | 	void rasterizeImpl();
 62 | 
 63 | protected:
 64 | 
 65 | 	uint32_t _fragSize = 2;
 66 | 
 67 | 	// for debug.
 68 | 	bool _dbgDumpWindingNumber = false;
 69 | 	bool _dbgDumpFragmentData = false;
 70 | 
 71 | 	struct _GL{
 72 | 		_GL() {}
 73 | 		struct _GL_Buffer{
 74 | 			_GL_Buffer() {}
 75 | 
 76 | 			NamedBuffer stencilDrawData;
 77 | 			NamedBuffer stencilDrawMask;
 78 | 
 79 | 			NamedBuffer outputIndex;
 80 | 			NamedBuffer outputFragmentData;
 81 | 			NamedBuffer outputSpanData;
 82 | 			NamedBuffer outputFillInfo;
 83 | 
 84 | 			NamedBuffer qm_output_stencil_mask;
 85 | 
 86 | 			// -- debug --
 87 | 			NamedBuffer dbgCurveVertex;
 88 | 			NamedBuffer dbgCurveColor;
 89 | 
 90 | 			NamedBuffer dbgDrawStencilDump_0;
 91 | 			NamedBuffer dbgDrawStencilDump_1;
 92 | 			NamedBuffer dbgDrawStencilDump_2;
 93 | 		} buffer;
 94 | 
 95 | 		struct _GL_Texture{
 96 | 			_GL_Texture() {}
 97 | 
 98 | 			// texbuffer
 99 | 			NamedTexture stencilDrawData;
100 | 			NamedTexture stencilDrawMask;
101 | 
102 | 			NamedTexture outputIndex;
103 | 			NamedTexture outputFragmentData;
104 | 			NamedTexture outputSpanData;
105 | 			NamedTexture outputFillInfo;
106 | 
107 | 			// tex2D
108 | 			NamedTexture stencilDraw;
109 | 
110 | 			// -- debug --
111 | 			NamedTexture dbgCurveVertex;
112 | 			NamedTexture dbgCurveColor;
113 | 
114 | 			NamedTexture dbgDrawCount;
115 | 
116 | 			NamedTexture dbgDrawStencilDump_0;
117 | 			NamedTexture dbgDrawStencilDump_1;
118 | 			NamedTexture dbgDrawStencilDump_2;
119 | 		} texture;
120 | 
121 | 		struct _GL_Framebuffer{
122 | 			_GL_Framebuffer() {}
123 | 			NamedFramebuffer stencilDrawMS;
124 | 		} framebuffer;
125 | 
126 | 		struct _GL_Program{
127 | 			_GL_Program() {}
128 | 
129 | 			ShaderProgram output;
130 | 
131 | 			// -- debug --
132 | 			ShaderProgram dbgCurve;
133 | 			ShaderProgram dbgCurveFragment;
134 | 			ShaderProgram dbgOutputScale;
135 | 
136 | 		} program;
137 | 
138 | 	} _gl;
139 | 
140 | 	struct _GPU_Array{
141 | 		_GPU_Array() {}
142 | 
143 | 		// transform && stroke to fill
144 | 		CUDAArray<float2> strokeTransformedVertex;
145 | 		CUDAArray<int> strokeToFillNewCurveTemp;
146 | 		
147 | 		CUDAArray<float2> transformedVertex;
148 | 		
149 | 		// monotonize
150 | 		CUDAArray<int> curve_pixel_count;
151 | 		CUDAArray<float> monotonic_cutpoint_cache;		
152 | 		CUDAArray<float> intersection;
153 | 
154 | 		CUDAArray<float> monoCurveT;
155 | 		CUDAArray<uint32_t> monoCurveNumber;
156 | 		CUDAArray<uint32_t> monoCurveSize;
157 | 		CUDAArray<uint32_t> curveFragmentNumber;
158 | 
159 | 		CUDAArray<int32_t> ic4Context;
160 | 
161 | 		CUDAArray<int32_t> fragmentData;
162 | 
163 | 		// mask
164 | 		CUDAArray<uint32_t> amaskTable;
165 | 		CUDAArray<uint32_t> pmaskTable;
166 | 
167 | 		// temp for CUDA SM gen stencil
168 | 		CUDAArray<int32_t> blockBoundaryBins;
169 | 
170 | 		// for CUDA cell list output
171 | 		CUDAArray<int32_t> cellListPos;
172 | 		CUDAArray<int32_t> cellListFillInfo;
173 | 		CUDAArray<int32_t> cellListMaskIndex;
174 | 
175 | 	} _gpu;
176 | 
177 | 	struct __CUDA {
178 | 		__CUDA() {}
179 | 		struct __CUDAResrouce {
180 | 			__CUDAResrouce() :
181 | 				stencilDrawData(nullptr), stencilDrawMask(nullptr),
182 | 				outputIndex(nullptr), outputFragment(nullptr),
183 | 				outputSpan(nullptr), outputFillInfo(nullptr)
184 | 			{}
185 | 
186 | 			cudaGraphicsResource *stencilDrawData = nullptr;
187 | 			cudaGraphicsResource *stencilDrawMask = nullptr;
188 | 
189 | 			cudaGraphicsResource *outputIndex = nullptr;
190 | 			cudaGraphicsResource *outputFragment = nullptr;
191 | 			cudaGraphicsResource *outputSpan = nullptr;
192 | 			cudaGraphicsResource *outputFillInfo = nullptr;
193 | 
194 | 			cudaGraphicsResource *qm_output_stencil_mask = nullptr;
195 | 		} resource;
196 | 	} _cuda;
197 | 
198 | 	CUDAArray<int> _qm_mask_table_pixel8;
199 | 	CUDAArray<int4> _qm_mask_table_pixel32;
200 | 
201 | 	CUDAArray<float2> _sample_position;
202 | };
203 | 
204 | } // end of namespace BigFragAM
205 | 
206 | } // end of namespace Mochimazui
207 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/kernels/localitysort.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "../mgpuhost.cuh"
 38 | #include "../kernels/mergesort.cuh"
 39 | #include "../kernels/segmentedsort.cuh"
 40 | 
 41 | namespace mgpu {
 42 | 
 43 | template<typename T, typename Comp>
 44 | MGPU_HOST void LocalitySortKeys(T* data_global, int count, CudaContext& context,
 45 | 	Comp comp, bool verbose) {
 46 | 
 47 | 	const int NT = 128;
 48 | 	const int VT = 11;
 49 | 	typedef LaunchBoxVT<NT, VT> Tuning;
 50 | 	int2 launch = Tuning::GetLaunchParams(context);
 51 | 	const int NV = launch.x * launch.y;
 52 | 
 53 | 	int numBlocks = MGPU_DIV_UP(count, NV);
 54 | 	int numPasses = FindLog2(numBlocks, true);
 55 | 
 56 | 	SegSortSupport support;
 57 | 	MGPU_MEM(byte) mem = AllocSegSortBuffers(count, NV, support, false,
 58 | 		context);
 59 | 	
 60 | 	MGPU_MEM(T) destDevice = context.Malloc<T>(count);
 61 | 	T* source = data_global;
 62 | 	T* dest = destDevice->get(); 
 63 | 	
 64 | 	KernelBlocksort<Tuning, false>
 65 | 		<<<numBlocks, launch.x, 0, context.Stream()>>>(source, (const int*)0,
 66 | 		count, (1 & numPasses) ? dest : source, (int*)0, comp);
 67 | 	MGPU_SYNC_CHECK("KernelBlocksort");
 68 | 
 69 | 	if(1 & numPasses) std::swap(source, dest);
 70 | 
 71 | 	SegSortPasses<Tuning, false, false>(support, source, (int*)0, count, 
 72 | 		numBlocks, numPasses, dest, (int*)0, comp, context, verbose);
 73 | } 
 74 | template<typename T>
 75 | MGPU_HOST void LocalitySortKeys(T* data_global, int count, CudaContext& context,
 76 | 	bool verbose) {
 77 | 	LocalitySortKeys(data_global, count, context, mgpu::less<T>(), verbose);
 78 | }
 79 | 
 80 | template<typename KeyType, typename ValType, typename Comp>
 81 | MGPU_HOST void LocalitySortPairs(KeyType* keys_global, ValType* values_global,
 82 | 	int count, CudaContext& context, Comp comp, bool verbose) {
 83 | 
 84 | 	const int NT = 128;
 85 | 	const int VT = 7;
 86 | 	typedef LaunchBoxVT<NT, VT> Tuning;
 87 | 	int2 launch = Tuning::GetLaunchParams(context);
 88 | 	const int NV = launch.x * launch.y;
 89 | 
 90 | 	int numBlocks = MGPU_DIV_UP(count, NV);
 91 | 	int numPasses = FindLog2(numBlocks, true);
 92 | 
 93 | 	SegSortSupport support;
 94 | 	MGPU_MEM(byte) mem = AllocSegSortBuffers(count, NV, support, false,
 95 | 		context);
 96 | 	
 97 | 	MGPU_MEM(KeyType) keysDestDevice = context.Malloc<KeyType>(count);
 98 | 	MGPU_MEM(ValType) valsDestDevice = context.Malloc<ValType>(count);
 99 | 
100 | 	KeyType* keysSource = keys_global;
101 | 	KeyType* keysDest = keysDestDevice->get();
102 | 	ValType* valsSource = values_global;
103 | 	ValType* valsDest = valsDestDevice->get();
104 | 
105 | 	KernelBlocksort<Tuning, true><<<numBlocks, launch.x, 0, context.Stream()>>>(
106 | 		keysSource, valsSource, count, (1 & numPasses) ? keysDest : keysSource,
107 | 		(1 & numPasses) ? valsDest : valsSource, comp);
108 | 	MGPU_SYNC_CHECK("KernelBlocksort");
109 | 
110 | 	if(1 & numPasses) {
111 | 		std::swap(keysSource, keysDest);
112 | 		std::swap(valsSource, valsDest);
113 | 	}
114 | 
115 | 	SegSortPasses<Tuning, false, true>(support, keysSource, valsSource, count,
116 | 		numBlocks, numPasses, keysDest, valsDest, comp, context, verbose);
117 | } 
118 | template<typename KeyType, typename ValType>
119 | MGPU_HOST void LocalitySortPairs(KeyType* keys_global, ValType* values_global,
120 | 	int count, CudaContext& context, bool verbose) {
121 | 	LocalitySortPairs(keys_global, values_global, count, context,
122 | 		mgpu::less<KeyType>(), verbose);
123 | }
124 | 
125 | } // namespace mgpu
126 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/util/format.h:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "static.h"
 38 | #include <vector>
 39 | #include <cstdio>
 40 | #include <algorithm>
 41 | #include <string>
 42 | 
 43 | namespace mgpu {
 44 | 
 45 | // Like sprintf but dynamically allocates sufficient output to hold the entire
 46 | // text.
 47 | std::string stringprintf(const char* format, ...);
 48 | 
 49 | // Returns xxx.xx(K|M|B)
 50 | std::string FormatInteger(int64 x);
 51 | 
 52 | const char* TypeIdString(const std::type_info& ti);
 53 | 
 54 | template<typename T>
 55 | const char* TypeIdName() {
 56 | 	return TypeIdString(typeid(T));
 57 | }
 58 | 
 59 | struct FormatOpPrintf {
 60 | 	const char* format;
 61 | 	FormatOpPrintf(const char* f) : format(f) { }
 62 | 
 63 | 	template<typename T>
 64 | 	std::string operator()(int index, T x) const {
 65 | 		return stringprintf(format, x);
 66 | 	}
 67 | };
 68 | 
 69 | struct FormatOpMaskBit {
 70 | 	const char* format;
 71 | 	FormatOpMaskBit(const char* f) : format(f) { }
 72 | 
 73 | 	std::string operator()(int index, int x) const {
 74 | 		return stringprintf(format, (0x80000000 & x) ? '*' : ' ', 
 75 | 			0x7fffffff & x);
 76 | 	}
 77 | };
 78 | 
 79 | struct FormatOpMarkArray {
 80 | 	const char* format;
 81 | 	const int* marks;
 82 | 	int numMarks;
 83 | 
 84 | 	FormatOpMarkArray(const char* f, const int* m, int n) :
 85 | 		format(f), marks(m), numMarks(n) { }
 86 | 
 87 | 	std::string operator()(int index, int x) const {
 88 | 		// Search for index in the array of marks.
 89 | 		bool mark = std::binary_search(marks, marks + numMarks, index);
 90 | 		return stringprintf(format, mark ? '*' : ' ', x);
 91 | 	}
 92 | };
 93 | 
 94 | template<typename T, typename Op>
 95 | std::string FormatArrayOp(const T* data, size_t count, Op op, int numCols) {
 96 | 	std::string s;
 97 | 	size_t numRows = MGPU_DIV_UP(count, numCols);
 98 | 	for(size_t row(0); row < numRows; ++row) { 
 99 | 		size_t left = row * numCols;
100 | 		s.append(stringprintf("%5d:  ", left));
101 | 
102 | 		for(size_t col(left); col < std::min(left + numCols, count); ++col) {
103 | 			s.append(op(col, data[col]));
104 | 			s.push_back(' ');
105 | 		}
106 | 		s.push_back('\n');
107 | 	}
108 | 	return s;
109 | }
110 | 
111 | template<typename T>
112 | std::string FormatArray(const T* data, size_t count, const char* format, 
113 | 	int numCols) {
114 | 	return FormatArrayOp(data, count, FormatOpPrintf(format), numCols);
115 | }
116 | 
117 | template<typename T>
118 | std::string FormatArray(const std::vector<T>& data, const char* format, 
119 | 	int numCols) {
120 | 	return FormatArray(&data[0], (int)data.size(), format, numCols);
121 | }
122 | template<typename T, typename Op>
123 | std::string FormatArrayOp(const std::vector<T>& data, Op op, int numCols) {
124 | 	return FormatArrayOp(&data[0], (int)data.size(), op, numCols);
125 | }
126 | 
127 | template<typename T>
128 | void PrintArray(const T* data, size_t count, const char* format, int numCols) {
129 | 	std::string s = FormatArray(data, count, format, numCols);
130 | 	printf("%s", s.c_str());
131 | }
132 | 
133 | template<typename T>
134 | void PrintArray(const std::vector<T>& data, const char* format, int numCols) {
135 | 	std::string s = FormatArray(data, format, numCols);
136 | 	printf("%s", s.c_str());
137 | }
138 | 
139 | template<typename T, typename Op>
140 | void PrintArrayOp(const std::vector<T>& data, Op op, int numCols) {
141 | 	std::string s = FormatArrayOp(data, op, numCols);
142 | 	printf("%s", s.c_str());
143 | }
144 | 
145 | 
146 | 
147 | 
148 | } // namespace mgpu
149 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/device/deviceutil.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "../device/intrinsics.cuh"
 38 | 
 39 | namespace mgpu {
 40 | 
 41 | // Get the difference between two pointers in bytes.
 42 | MGPU_HOST_DEVICE ptrdiff_t PtrDiff(const void* a, const void* b) {
 43 | 	return (const byte*)b - (const byte*)a;
 44 | }
 45 | 
 46 | // Offset a pointer by i bytes.
 47 | template<typename T> 
 48 | MGPU_HOST_DEVICE const T* PtrOffset(const T* p, ptrdiff_t i) {
 49 | 	return (const T*)((const byte*)p + i);
 50 | }
 51 | template<typename T>
 52 | MGPU_HOST_DEVICE T* PtrOffset(T* p, ptrdiff_t i) {
 53 | 	return (T*)((byte*)p + i);
 54 | }
 55 | 
 56 | ////////////////////////////////////////////////////////////////////////////////
 57 | // Task range support
 58 | // Evenly distributes variable-length arrays over a fixed number of CTAs.
 59 | 
 60 | MGPU_HOST int2 DivideTaskRange(int numItems, int numWorkers) {
 61 | 	div_t d = div(numItems, numWorkers);
 62 | 	return make_int2(d.quot, d.rem);
 63 | }
 64 | 
 65 | MGPU_HOST_DEVICE int2 ComputeTaskRange(int block, int2 task) {
 66 | 	int2 range;
 67 | 	range.x = task.x * block;
 68 | 	range.x += min(block, task.y);
 69 | 	range.y = range.x + task.x + (block < task.y);
 70 | 	return range;
 71 | }
 72 | 
 73 | MGPU_HOST_DEVICE int2 ComputeTaskRange(int block, int2 task, int blockSize, 
 74 | 	int count) {
 75 | 	int2 range = ComputeTaskRange(block, task);
 76 | 	range.x *= blockSize;
 77 | 	range.y = min(count, range.y * blockSize);
 78 | 	return range;
 79 | }
 80 | 
 81 | ////////////////////////////////////////////////////////////////////////////////
 82 | // DeviceExtractHeadFlags
 83 | // Input array flags is a bit array with 32 head flags per word.
 84 | // ExtractThreadHeadFlags returns numBits flags starting at bit index.
 85 | 
 86 | MGPU_HOST_DEVICE uint DeviceExtractHeadFlags(const uint* flags, int index, 
 87 | 	int numBits) {
 88 | 
 89 | 	int index2 = index>> 5;
 90 | 	int shift = 31 & index;
 91 | 	uint headFlags = flags[index2]>> shift;
 92 | 	int shifted = 32 - shift;
 93 | 
 94 | 	if(shifted < numBits)
 95 | 		// We also need to shift in the next set of bits.
 96 | 		headFlags = bfi(flags[index2 + 1], headFlags, shifted, shift);
 97 | 	headFlags &= (1<< numBits) - 1;
 98 | 	return headFlags;
 99 | }
100 | 
101 | ////////////////////////////////////////////////////////////////////////////////
102 | // DevicePackHeadFlags
103 | // Pack VT bits per thread at 32 bits/thread. Will consume an integer number of
104 | // words, because CTA size is a multiple of 32. The first NT * VT / 32 threads
105 | // return packed words.
106 | 
107 | template<int NT, int VT>
108 | MGPU_DEVICE uint DevicePackHeadFlags(uint threadBits, int tid, 
109 | 	uint* flags_shared) {
110 | 
111 | 	const int WordCount = NT * VT / 32;
112 | 
113 | 	// Each thread stores its thread bits to flags_shared[tid].
114 | 	flags_shared[tid] = threadBits;
115 | 	__syncthreads();
116 | 
117 | 	uint packed = 0;
118 | 	if(tid < WordCount) {
119 | 		const int Items = MGPU_DIV_UP(32, VT);
120 | 		int index = 32 * tid;
121 | 		int first = index / VT;
122 | 		int bit = 0;
123 | 
124 | 		int rem = index - VT * first;
125 | 		packed = flags_shared[first]>> rem;
126 | 		bit = VT - rem;
127 | 		++first;
128 | 		
129 | 		#pragma unroll
130 | 		for(int i = 0; i < Items; ++i) {
131 | 			if(i < Items - 1 || bit < 32) {
132 | 				uint x = flags_shared[first + i];
133 | 				if(bit < 32) packed |= x<< bit;
134 | 				bit += VT;
135 | 			}
136 | 		}
137 | 	}
138 | 	__syncthreads();
139 | 
140 | 	return packed;
141 | }
142 | 
143 | } // namespace mgpu
144 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/device/ctasegscan.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "../device/ctascan.cuh"
 38 | 
 39 | namespace mgpu {
 40 | 
 41 | ////////////////////////////////////////////////////////////////////////////////
 42 | // DeviceFindSegScanDelta
 43 | // Runs an inclusive max-index scan over binary inputs.
 44 | 
 45 | template<int NT>
 46 | MGPU_DEVICE int DeviceFindSegScanDelta(int tid, bool flag, int* delta_shared) {
 47 | 	const int NumWarps = NT / 32;
 48 | 
 49 | 	int warp = tid / 32;
 50 | 	int lane = 31 & tid;
 51 | 	uint warpMask = 0xffffffff>> (31 - lane);		// inclusive search
 52 | 	uint ctaMask = 0x7fffffff>> (31 - lane);		// exclusive search
 53 | 
 54 | 	uint warpBits = __ballot(flag);
 55 | 	delta_shared[warp] = warpBits;
 56 | 	__syncthreads();
 57 | 
 58 | 	if(tid < NumWarps) {
 59 | 		uint ctaBits = __ballot(0 != delta_shared[tid]);
 60 | 		int warpSegment = 31 - clz(ctaMask & ctaBits);
 61 | 		int start = (-1 != warpSegment) ? 
 62 | 			(31 - clz(delta_shared[warpSegment]) + 32 * warpSegment) : 0;
 63 | 		delta_shared[NumWarps + tid] = start;
 64 | 	}
 65 | 	__syncthreads();
 66 | 
 67 | 	// Find the closest flag to the left of this thread within the warp.
 68 | 	// Include the flag for this thread.
 69 | 	int start = 31 - clz(warpMask & warpBits);
 70 | 	if(-1 != start) start += ~31 & tid;
 71 | 	else start = delta_shared[NumWarps + warp];
 72 | 	__syncthreads();
 73 | 
 74 | 	return tid - start;
 75 | }
 76 |   
 77 | ////////////////////////////////////////////////////////////////////////////////
 78 | // CTASegScan
 79 | 
 80 | template<int NT, typename _Op = mgpu::plus<int> >
 81 | struct CTASegScan {
 82 | 	typedef _Op Op;
 83 | 	typedef typename Op::result_type T;
 84 | 	enum { NumWarps = NT / 32, Size = NT, Capacity = 2 * NT };
 85 | 	union Storage {
 86 | 		int delta[NumWarps];
 87 | 		T values[Capacity];
 88 | 	};
 89 | 
 90 | 	// Each thread passes the reduction of the LAST SEGMENT that it covers.
 91 | 	// flag is set to true if there's at least one segment flag in the thread.
 92 | 	// SegScan returns the reduction of values for the first segment in this
 93 | 	// thread over the preceding threads.
 94 | 	// Return the value init for the first thread.
 95 | 
 96 | 	// When scanning single elements per thread, interpret the flag as a BEGIN
 97 | 	// FLAG. If tid's flag is set, its value belongs to thread tid + 1, not 
 98 | 	// thread tid.
 99 | 
100 | 	// The function returns the reduction of the last segment in the CTA.
101 | 
102 | 	MGPU_DEVICE static T SegScanDelta(int tid, int tidDelta, T x, 
103 | 		Storage& storage, T* carryOut, T identity = (T)0, Op op = Op()) {
104 | 
105 | 		// Run an inclusive scan 
106 | 		int first = 0;
107 | 		storage.values[first + tid] = x;
108 | 		__syncthreads();
109 | 
110 | 		#pragma unroll
111 | 		for(int offset = 1; offset < NT; offset += offset) {
112 | 			if(tidDelta >= offset) 
113 | 				x = op(storage.values[first + tid - offset], x);
114 | 			first = NT - first;
115 | 			storage.values[first + tid] = x;
116 | 			__syncthreads();
117 | 		}
118 | 
119 | 		// Get the exclusive scan.
120 | 		x = tid ? storage.values[first + tid - 1] : identity;
121 | 		*carryOut = storage.values[first + NT - 1];
122 | 		__syncthreads();
123 | 		return x;
124 | 	}
125 | 
126 | 	MGPU_DEVICE static T SegScan(int tid, T x, bool flag, Storage& storage,
127 | 		T* carryOut, T identity = (T)0, Op op = Op()) {
128 | 
129 | 		// Find the left-most thread that covers the first segment of this 
130 | 		// thread.
131 | 		int tidDelta = DeviceFindSegScanDelta<NT>(tid, flag, storage.delta);
132 | 
133 | 		return SegScanDelta(tid, tidDelta, x, storage, carryOut, identity, op);
134 | 	}
135 | };
136 | 
137 | } // namespace mgpu
138 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/device/ctaloadbalance.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "../device/ctasearch.cuh"
 38 | #include "../device/loadstore.cuh"
 39 | 
 40 | namespace mgpu {
 41 | 
 42 | ////////////////////////////////////////////////////////////////////////////////
 43 | // DeviceLoadBalancingSearch
 44 | // Upper Bound search from A (needles) into B (haystack). The A values are 
 45 | // natural numbers from aBegin to aEnd. bFirst is the index of the B value at
 46 | // bBegin in shared memory.
 47 | 
 48 | template<int VT, bool RangeCheck>
 49 | MGPU_DEVICE void DeviceSerialLoadBalanceSearch(const int* b_shared, int aBegin,
 50 | 	int aEnd, int bFirst, int bBegin, int bEnd, int* a_shared) {
 51 | 
 52 | 	int bKey = b_shared[bBegin];
 53 | 
 54 | 	#pragma unroll
 55 | 	for(int i = 0; i < VT; ++i) {
 56 | 		bool p;
 57 | 		if(RangeCheck) 
 58 | 			p = (aBegin < aEnd) && ((bBegin >= bEnd) || (aBegin < bKey));
 59 | 		else
 60 | 			p = aBegin < bKey;
 61 | 
 62 | 		if(p)
 63 | 			// Advance A (the needle).
 64 | 			a_shared[aBegin++] = bFirst + bBegin;
 65 | 		else
 66 | 			// Advance B (the haystack).
 67 | 			bKey = b_shared[++bBegin];
 68 | 	}
 69 | }
 70 | 
 71 | ////////////////////////////////////////////////////////////////////////////////
 72 | // CTALoadBalance
 73 | // Computes upper_bound(counting_iterator<int>(first), b_global) - 1.
 74 | 
 75 | // Unlike most other CTA* functions, CTALoadBalance loads from global memory.
 76 | // This returns the loaded B elements at the beginning or end of shared memory
 77 | // depending on the aFirst argument. 
 78 | 
 79 | // CTALoadBalance requires NT * VT + 2 slots of shared memory.
 80 | template<int NT, int VT, typename InputIt>
 81 | MGPU_DEVICE int4 CTALoadBalance(int destCount, InputIt b_global, 
 82 | 	int sourceCount, int block, int tid, const int* mp_global, 
 83 | 	int* indices_shared, bool loadPrecedingB) {
 84 | 		    
 85 | 	int4 range = ComputeMergeRange(destCount, sourceCount, block, 0, NT * VT, 
 86 | 		mp_global);
 87 | 
 88 | 	int a0 = range.x;
 89 | 	int a1 = range.y;
 90 | 	int b0 = range.z;
 91 | 	int b1 = range.w;
 92 | 	if(!b0) loadPrecedingB = false;
 93 | 
 94 | 	// Load one trailing term from B. If we're already at the end, fill the 
 95 | 	// end of the buffer with destCount.
 96 | 	int aCount = a1 - a0;
 97 | 	int bCount = b1 - b0;
 98 | 	int extended = b1 < sourceCount;
 99 | 	int loadCount = bCount + extended;
100 | 	int fillCount = NT * VT + 1 - loadCount - aCount;
101 | 
102 | 	int* a_shared = indices_shared;
103 | 	int* b_shared = indices_shared + aCount + (int)loadPrecedingB;
104 | 
105 | 	// Load the B values.
106 | //	DeviceMemToMemLoop<NT>(bCount + extended + (int)loadPrecedingB, 
107 | //		b_global + b0 - (int)loadPrecedingB, tid, 
108 | //		b_shared - (int)loadPrecedingB);
109 | 
110 | 	for(int i = tid - (int)loadPrecedingB; i < bCount + extended; i += NT)
111 | 		b_shared[i] = b_global[b0 + i];
112 | 
113 | 	// Fill the end of the array with destCount.
114 | 	for(int i = tid + extended; i < fillCount; i += NT)
115 | 		b_shared[bCount + i] = destCount;
116 | 	__syncthreads();
117 | 
118 | 	// Run a merge path to find the start of the serial merge for each thread.
119 | 	int diag = VT * tid;
120 | 	int mp = MergePath<MgpuBoundsUpper>(mgpu::counting_iterator<int>(a0),
121 | 		aCount, b_shared, bCount, diag, mgpu::less<int>());
122 | 
123 | 	int a0tid = a0 + mp;
124 | 	int b0tid = diag - mp;
125 | 	
126 | 	// Subtract 1 from b0 because we want to return upper_bound - 1.
127 | 	DeviceSerialLoadBalanceSearch<VT, false>(b_shared, a0tid, a1, b0 - 1,
128 | 		b0tid, bCount, a_shared - a0);
129 | 	__syncthreads();
130 | 	
131 | 	b0 -= (int)loadPrecedingB;
132 | 	return make_int4(a0, a1, b0, b1);
133 | }
134 | 
135 | 
136 | } // namespace mgpu
137 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/kernels/reduce.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "../mgpuhost.cuh"
 38 | 
 39 | namespace mgpu {
 40 | 
 41 | ////////////////////////////////////////////////////////////////////////////////
 42 | // KernelReduce
 43 | 
 44 | template<typename Tuning, typename InputIt, typename T, typename Op>
 45 | MGPU_LAUNCH_BOUNDS void KernelReduce(InputIt data_global, int count, 
 46 | 	T identity, Op op, T* reduction_global) {
 47 | 
 48 | 	typedef MGPU_LAUNCH_PARAMS Params;
 49 | 	const int NT = Params::NT;
 50 | 	const int VT = Params::VT;
 51 | 	const int NV = NT * VT;
 52 | 	typedef CTAReduce<NT, Op> R;
 53 | 
 54 | 	union Shared {
 55 | 		typename R::Storage reduceStorage;
 56 | 	};
 57 | 	__shared__ Shared shared;
 58 | 
 59 | 	int tid = threadIdx.x;
 60 | 	int block = blockIdx.x;
 61 | 	int gid = NV * block;
 62 | 	int count2 = min(NV, count - gid);
 63 | 
 64 | 	// Load a full tile into register in strided order. Set out-of-range values
 65 | 	// with identity.
 66 | 	T data[VT];
 67 | 	DeviceGlobalToRegDefault<NT, VT>(count2, data_global + gid, tid, data,
 68 | 		identity);
 69 | 
 70 | 	// Sum elements within each thread.
 71 | 	T x;
 72 | 	#pragma unroll
 73 | 	for(int i = 0; i < VT; ++i)
 74 | 		x = i ? op(x, data[i]) : data[i];
 75 | 
 76 | 	// Sum thread-totals over the CTA.
 77 | 	x = R::Reduce(tid, x, shared.reduceStorage, op);
 78 | 
 79 | 	// Store the tile's reduction to global memory.
 80 | 	if(!tid)
 81 | 		reduction_global[block] = x;
 82 | }
 83 | 
 84 | ////////////////////////////////////////////////////////////////////////////////
 85 | // Reduce
 86 | 
 87 | template<typename InputIt, typename T, typename Op>
 88 | MGPU_HOST void Reduce(InputIt data_global, int count, T identity, Op op,
 89 | 	T* reduce_global, T* reduce_host, CudaContext& context) {
 90 | 
 91 | 	MGPU_MEM(T) totalDevice;
 92 | 	if(!reduce_global) {
 93 | 		totalDevice = context.Malloc<T>(1);
 94 | 		reduce_global = totalDevice->get();
 95 | 	}
 96 | 
 97 | 	if(count <= 256) {
 98 | 		typedef LaunchBoxVT<256, 1> Tuning;
 99 | 		KernelReduce<Tuning><<<1, 256, 0, context.Stream()>>>(
100 | 			data_global, count, identity, op, reduce_global);
101 | 		MGPU_SYNC_CHECK("KernelReduce");
102 | 
103 | 	} else if(count <= 768) {
104 | 		typedef LaunchBoxVT<256, 3> Tuning;
105 | 		KernelReduce<Tuning><<<1, 256, 0, context.Stream()>>>(
106 | 			data_global, count, identity, op, reduce_global);
107 | 		MGPU_SYNC_CHECK("KernelReduce");
108 | 
109 | 	} else if(count <= 512 * ((sizeof(T) > 4) ? 4 : 8)) {
110 | 		typedef LaunchBoxVT<512, (sizeof(T) > 4) ? 4 : 8> Tuning;
111 | 		KernelReduce<Tuning><<<1, 512, 0, context.Stream()>>>(
112 | 			data_global, count, identity, op, reduce_global);
113 | 		MGPU_SYNC_CHECK("KernelReduce");
114 | 
115 | 	} else {
116 | 		// Launch a grid and reduce tiles to temporary storage.
117 | 		typedef LaunchBoxVT<256, (sizeof(T) > 4) ? 8 : 16> Tuning;
118 | 		int2 launch = Tuning::GetLaunchParams(context);
119 | 		int NV = launch.x * launch.y;
120 | 		int numBlocks = MGPU_DIV_UP(count, NV);
121 | 
122 | 		MGPU_MEM(T) reduceDevice = context.Malloc<T>(numBlocks);
123 | 		KernelReduce<Tuning><<<numBlocks, launch.x, 0, context.Stream()>>>(
124 | 			data_global, count, identity, op, reduceDevice->get());
125 | 		MGPU_SYNC_CHECK("KernelReduce");
126 | 
127 | 		Reduce(reduceDevice->get(), numBlocks, identity, op, reduce_global,
128 | 			(T*)0, context);
129 | 	}
130 | 
131 | 	if(reduce_host)
132 | 		copyDtoH(reduce_host, reduce_global, 1);
133 | }
134 | 
135 | template<typename InputIt>
136 | MGPU_HOST typename std::iterator_traits<InputIt>::value_type
137 | Reduce(InputIt data_global, int count, CudaContext& context) { 
138 | 	typedef typename std::iterator_traits<InputIt>::value_type T;
139 | 	T result;
140 | 	Reduce(data_global, count, (T)0, mgpu::plus<T>(), (T*)0, &result, context);
141 | 	return result;
142 | }
143 | 
144 | } // namespace mgpu
145 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/kernels/bulkremove.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include "../mgpuhost.cuh"
 38 | #include "../kernels/search.cuh"
 39 | 
 40 | namespace mgpu {
 41 | 
 42 | ////////////////////////////////////////////////////////////////////////////////
 43 | // KernelBulkRemove
 44 | // Copy the values that are not matched by an index. This is like the 
 45 | // anti-gather.
 46 | 
 47 | template<typename Tuning, typename InputIt, typename IndicesIt, 
 48 | 	typename OutputIt>
 49 | MGPU_LAUNCH_BOUNDS void KernelBulkRemove(InputIt source_global, int sourceCount, 
 50 | 	IndicesIt indices_global, int indicesCount, const int* p_global,
 51 | 	OutputIt dest_global) {
 52 | 
 53 | 	typedef MGPU_LAUNCH_PARAMS Params;
 54 | 	typedef typename std::iterator_traits<InputIt>::value_type T;
 55 | 	const int NT = Params::NT;
 56 | 	const int VT = Params::VT;
 57 | 	const int NV = NT * VT;
 58 | 
 59 | 	typedef CTAScan<NT> S;
 60 | 	union Shared {
 61 | 		int indices[NV];
 62 | 		typename S::Storage scan;
 63 | 	};
 64 | 	__shared__ Shared shared;
 65 | 
 66 | 	int tid = threadIdx.x;
 67 | 	int block = blockIdx.x;
 68 | 	int gid = block * NV;
 69 | 	sourceCount = min(NV, sourceCount - gid);
 70 | 
 71 | 	// Search for begin and end iterators of interval to load.
 72 | 	int p0 = p_global[block];
 73 | 	int p1 = p_global[block + 1];
 74 | 
 75 | 	// Set the flags to 1. The default is to copy a value.
 76 | 	#pragma unroll
 77 | 	for(int i = 0; i < VT; ++i) {
 78 | 		int index = NT * i + tid;
 79 | 		shared.indices[index] = index < sourceCount;
 80 | 	}
 81 | 	__syncthreads();
 82 | 
 83 | 	// Load the indices into register.
 84 | 	int begin = p0;
 85 | 	int indexCount = p1 - begin;
 86 | 	int indices[VT];
 87 | 	DeviceGlobalToReg<NT, VT>(indexCount, indices_global + begin, tid, indices);
 88 | 
 89 | 	// Set the counter to 0 for each index we've loaded.
 90 | 	#pragma unroll
 91 | 	for(int i = 0; i < VT; ++i)
 92 | 		if(NT * i + tid < indexCount) 
 93 | 			shared.indices[indices[i] - gid] = 0;
 94 | 	__syncthreads();
 95 | 
 96 | 	// Run a raking scan over the flags. We count the set flags - this is the 
 97 | 	// number of elements to load in per thread.
 98 | 	int x = 0;
 99 | 	#pragma unroll
100 | 	for(int i = 0; i < VT; ++i)
101 | 		x += indices[i] = shared.indices[VT * tid + i];
102 | 	__syncthreads();
103 | 
104 | 	// Run a CTA scan and scatter the gather indices to shared memory.
105 | 	int scan = S::Scan(tid, x, shared.scan);
106 | 	#pragma unroll
107 | 	for(int i = 0; i < VT; ++i)
108 | 		if(indices[i]) shared.indices[scan++] = VT * tid + i;
109 | 	__syncthreads();
110 | 
111 | 	// Load the gather indices into register.
112 | 	DeviceSharedToReg<NT, VT>(shared.indices, tid, indices);
113 | 
114 | 	// Gather the data into register. The number of values to copy is 
115 | 	// sourceCount - indexCount.
116 | 	source_global += gid;
117 | 	int count = sourceCount - indexCount;
118 | 	T values[VT];
119 | 	DeviceGather<NT, VT>(count, source_global, indices, tid, values, false);
120 | 
121 | 	// Store all the valid registers to dest_global.
122 | 	DeviceRegToGlobal<NT, VT>(count, values, tid, dest_global + gid - begin);
123 | }
124 | 
125 | ////////////////////////////////////////////////////////////////////////////////
126 | // BulkRemove 
127 | 
128 | template<typename InputIt, typename IndicesIt, typename OutputIt>
129 | MGPU_HOST void BulkRemove(InputIt source_global, int sourceCount,
130 | 	IndicesIt indices_global, int indicesCount, OutputIt dest_global,
131 | 	CudaContext& context) {
132 | 
133 | 	const int NT = 128;
134 | 	const int VT = 11;
135 | 	typedef LaunchBoxVT<NT, VT> Tuning;
136 | 	int2 launch = Tuning::GetLaunchParams(context);
137 | 	const int NV = launch.x * launch.y;
138 | 
139 | 	MGPU_MEM(int) partitionsDevice = BinarySearchPartitions<MgpuBoundsLower>(
140 | 		sourceCount, indices_global, indicesCount, NV, mgpu::less<int>(), 
141 | 		context);
142 | 
143 | 	int numBlocks = MGPU_DIV_UP(sourceCount, NV);
144 | 	KernelBulkRemove<Tuning><<<numBlocks, launch.x, 0, context.Stream()>>>(
145 | 		source_global, sourceCount, indices_global, indicesCount, 
146 | 		partitionsDevice->get(), dest_global);
147 | 	MGPU_SYNC_CHECK("KernelBulkRemove");
148 | }
149 | 
150 | } // namespace mgpu
151 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/rasterizer/shared/ras_qm_mask.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "ras_qm_mask.h"
  3 | 
  4 | #include <mochimazui/glpp.h>
  5 | 
  6 | namespace Mochimazui {
  7 | 
  8 | namespace QM_Mask_Sample_Position {
  9 | 
 10 | float mpvg_8_x[8] =
 11 | {
 12 | 	-0.266471f, 0.353688f, -0.401679f, 0.488846f,
 13 | 	0.122459f, -0.0344567f, -0.139007f, 0.207413f,
 14 | };
 15 | 
 16 | float mpvg_8_y[8] =
 17 | {
 18 | 	0.164718f, 0.0396624f, -0.215021f, 0.429684f,
 19 | 	0.282964f, -0.0841444f, -0.475235f, -0.328058f,
 20 | };
 21 | 
 22 | float mpvg_32_x[32] =  {
 23 | 	0.18936400f, 0.31758200f, 0.00903428f, -0.21124700f,
 24 | 	-0.36328000f, 0.33291100f, -0.49970400f, -0.43663000f,
 25 | 	-0.26837500f, 0.37728700f, -0.18975300f, -0.48250600f,
 26 | 	-0.13179200f, 0.49235500f, 0.42711400f, 0.37090100f,
 27 | 	-0.31862800f, 0.02879450f, 0.04699840f, -0.16154700f,
 28 | 	0.18745700f, -0.35758100f, 0.19744000f, 0.21454900f,
 29 | 	-0.06650600f, 0.12811500f, 0.33646100f, 0.09921190f,
 30 | 	-0.05305180f, -0.39892000f, -0.06095580f, -0.25435800f,
 31 | };
 32 | 
 33 | float mpvg_32_y[32] = {
 34 | 	-0.34008000f, 0.40063000f, -0.37434000f, -0.08741820f,
 35 | 	-0.43687600f, -0.04052820f, -0.09869600f, 0.12511500f,
 36 | 	0.40422500f, 0.11086400f, -0.43411500f, -0.30842600f,
 37 | 	-0.26574100f, 0.47500100f, 0.26635400f, -0.24554700f,
 38 | 	-0.24432000f, 0.27314400f, -0.19358200f, 0.27910400f,
 39 | 	0.13190500f, -0.03301080f, 0.29668700f, -0.15357100f,
 40 | 	-0.05443540f, 0.47731600f, -0.43140000f, 0.00607759f,
 41 | 	0.44544500f, 0.30706200f, 0.12207700f, 0.13223200f,
 42 | };
 43 | 
 44 | void sort_samples(std::vector<float2> &samples) {
 45 | 	std::sort(samples.begin(), samples.end(), [](const float2 &a, const float2 &b) {
 46 | 		return (a.y < b.y) || (a.y == b.y && a.x < b.x) ? true : false;
 47 | 	});
 48 | }
 49 | 
 50 | std::vector<float2> mpvg_sample_position(int n_samples) {
 51 | 	std::vector<float2> samples;
 52 | 	samples.resize(n_samples);
 53 | 	if (n_samples == 8) {
 54 | 		for (int i = 0; i < 8; ++i) {
 55 | 			samples[i].x = mpvg_8_x[i] + 0.5f;
 56 | 			samples[i].y = 1.0f - (mpvg_8_y[i] + 0.5f);
 57 | 		}
 58 | 	}
 59 | 	else if (n_samples == 32) {
 60 | 		for (int i = 0; i < 32; ++i) {
 61 | 			samples[i].x = mpvg_32_x[i] + 0.5f;
 62 | 			samples[i].y = 1.0f - (mpvg_32_y[i] + 0.5f);
 63 | 		}
 64 | 	}
 65 | 	else {
 66 | 		throw std::runtime_error("mpvg_sample_position: unsupported sample number.");
 67 | 	}
 68 | 	sort_samples(samples);
 69 | 	return samples;
 70 | }
 71 | 
 72 | std::vector<float2> gl_sample_position(int n_samples) {
 73 | 	std::vector<float2> samples;
 74 | 
 75 | 	GLPP::NamedTexture ttex;
 76 | 	ttex.target(GLPP::Texture2DMultisample).create()
 77 | 		.storage2DMultisample(n_samples, GL_RGBA8, 1, 1, GL_TRUE);
 78 | 
 79 | 	GLPP::NamedFramebuffer tfbo;
 80 | 	tfbo.create().texture2D(GL_COLOR_ATTACHMENT0, ttex, 0);
 81 | 
 82 | 	tfbo.bind(GL_FRAMEBUFFER);
 83 | 
 84 | 	int gl_n_samples;
 85 | 	glGetIntegerv(GL_SAMPLES, &gl_n_samples);
 86 | 
 87 | 	if (n_samples != gl_n_samples) {
 88 | 		throw std::runtime_error("initQMMaskTable: incorrect sample number");
 89 | 	}
 90 | 
 91 | 	samples.resize(n_samples);
 92 | 	for (int i = 0; i < n_samples; ++i) {
 93 | 		glGetMultisamplefv(GL_SAMPLE_POSITION, i, (float*)(samples.data() + i));
 94 | 	}
 95 | 
 96 | 	tfbo.bind(GL_FRAMEBUFFER);
 97 | 
 98 | 	tfbo.destroy();
 99 | 	ttex.destroy();
100 | 
101 | 	sort_samples(samples);
102 | 	return samples;
103 | }
104 | 
105 | // -------- -------- -------- -------- -------- -------- -------- --------
106 | // -------- -------- -------- -------- -------- -------- -------- --------
107 | 
108 | float vg_8_x[8] = {};
109 | float vg_8_y[8] = {};
110 | 
111 | double vg_32_x[32] = {
112 | 	-0.49479166651144624000, 0.02604168653488159200, -0.24479165673255920000, 0.27604168653488159000, -0.36979167163372040000, 0.15104168653488159000, -0.09895834326744079600, 0.38020831346511841000, -0.43229166418313980000, 0.06770831346511840800, -0.18229165673255920000, 0.33854168653488159000, -0.30729167163372040000, 0.21354168653488159000, -0.05729165673255920400, 0.46354168653488159000, -0.46354166790843010000, 0.05729168653488159200, -0.19270834326744080000, 0.30729168653488159000, -0.31770832836627960000, 0.16145831346511841000, -0.06770834326744079600, 0.43229168653488159000, -0.40104166418313980000, 0.11979168653488159000, -0.13020834326744080000, 0.36979168653488159000, -0.27604167163372040000, 0.24479168653488159000, -0.02604165673255920400, 0.47395831346511841000,
113 | };
114 | double vg_32_y[32] = {
115 | 	-0.48437500000000000000, -0.45312500000000000000, -0.42187500000000000000, -0.39062500000000000000, -0.35937500000000000000, -0.32812500000000000000, -0.29687500000000000000, -0.26562500000000000000, -0.23437500000000000000, -0.20312500000000000000, -0.17187500000000000000, -0.14062500000000000000, -0.10937500000000000000, -0.07812500000000000000, -0.04687500000000000000, -0.01562500000000000000, 0.01562500000000000000, 0.04687500000000000000, 0.07812500000000000000, 0.10937500000000000000, 0.14062500000000000000, 0.17187500000000000000, 0.20312500000000000000, 0.23437500000000000000, 0.26562500000000000000, 0.29687500000000000000, 0.32812500000000000000, 0.35937500000000000000, 0.39062500000000000000, 0.42187500000000000000, 0.45312500000000000000, 0.48437500000000000000,
116 | };
117 | 
118 | std::vector<float2> vg_sample_position(int i_n_samples) {
119 | 
120 | 	std::vector<float2> fsample_8;
121 | 	std::vector<float2> fsample_32;
122 | 
123 | 	fsample_8.resize(8);
124 | 	fsample_32.resize(32);
125 | 
126 | 	for (int i = 0; i < 32; ++i) {
127 | 		fsample_32[i] = make_float2((float)(vg_32_x[i] + 0.5), (float)(vg_32_y[i] + 0.5));
128 | 	}
129 | 
130 | 	{
131 | 		int2 isample_8[8];
132 | 		double gap = 1.0 / 8;
133 | 		double hgap = gap / 2;
134 | 
135 | 		for (int i = 0; i < 8; ++i) {
136 | 
137 | 			int bc = 0;
138 | 			int x = i;
139 | 			int y = 0;
140 | 
141 | 			while (x) {
142 | 				y <<= 1;
143 | 				y |= x & 1;
144 | 				x >>= 1;
145 | 				++bc;
146 | 			}
147 | 
148 | 			int scale = 1 << bc;
149 | 			double f = y / (float)scale;
150 | 
151 | 			int yy = (int)(f * 8);
152 | 
153 | 			auto &s = isample_8[i];
154 | 			s.x = yy;
155 | 			s.y = i;
156 | 
157 | 			auto &sp = fsample_8[i];
158 | 			sp.x = (float)(s.x * gap + hgap);
159 | 			sp.y = (float)(s.y * gap + hgap);
160 | 		}
161 | 
162 | 		sort_samples(fsample_8);
163 | 	}
164 | 
165 | 
166 | 	if (i_n_samples == 8) {
167 | 		return fsample_8;
168 | 	}
169 | 	else if(i_n_samples == 32) {
170 | 		return fsample_32;
171 | 	}
172 | 	else {
173 | 		throw std::runtime_error("vg_sample_position: only support 8x & 32x samples.");
174 | 	}
175 | }
176 | 
177 | } // end of namespace QM_Mask_Sample_Position
178 | 
179 | } // end of namespace Mochimazui
180 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/modern_gpu/include/util/static.h:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
  3 |  * 
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are met:
  6 |  *     * Redistributions of source code must retain the above copyright
  7 |  *       notice, this list of conditions and the following disclaimer.
  8 |  *     * Redistributions in binary form must reproduce the above copyright
  9 |  *       notice, this list of conditions and the following disclaimer in the
 10 |  *       documentation and/or other materials provided with the distribution.
 11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 12 |  *       names of its contributors may be used to endorse or promote products
 13 |  *       derived from this software without specific prior written permission.
 14 |  * 
 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 16 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 18 |  * ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 |  *
 26 |  ******************************************************************************/
 27 | 
 28 | /******************************************************************************
 29 |  *
 30 |  * Code and text by Sean Baxter, NVIDIA Research
 31 |  * See http://nvlabs.github.io/moderngpu for repository and documentation.
 32 |  *
 33 |  ******************************************************************************/
 34 | 
 35 | #pragma once
 36 | 
 37 | #include <functional>
 38 | #include <iterator>
 39 | #include <cfloat>
 40 | #include <typeinfo>
 41 | #include <vector>
 42 | #include <list>
 43 | #include <map>
 44 | #include <algorithm>
 45 | #include <cassert>
 46 | #include <memory>
 47 | #include <cmath>
 48 | #include <cstdio>
 49 | #include <cstdlib>
 50 | 
 51 | #ifndef MGPU_MIN
 52 | #define MGPU_MIN(x, y) (((x) <= (y)) ? (x) : (y))
 53 | #define MGPU_MAX(x, y) (((x) >= (y)) ? (x) : (y))
 54 | #define MGPU_MAX0(x) (((x) >= 0) ? (x) : 0)
 55 | #define MGPU_ABS(x) (((x) >= 0) ? (x) : (-x))
 56 | 
 57 | #define MGPU_DIV_UP(x, y) (((x) + (y) - 1) / (y))
 58 | #define MGPU_DIV_ROUND(x, y) (((x) + (y) / 2) / (y))
 59 | #define MGPU_ROUND_UP(x, y) ((y) * MGPU_DIV_UP(x, y))
 60 | #define MGPU_SHIFT_DIV_UP(x, y) (((x) + ((1<< (y)) - 1))>> y)
 61 | #define MGPU_ROUND_UP_POW2(x, y) (((x) + (y) - 1) & ~((y) - 1))
 62 | #define MGPU_ROUND_DOWN_POW2(x, y) ((x) & ~((y) - 1))
 63 | #define MGPU_IS_POW_2(x) (0 == ((x) & ((x) - 1)))
 64 | 
 65 | #endif // MGPU_MIN
 66 | 
 67 | namespace mgpu {
 68 | 
 69 | 
 70 | typedef unsigned char byte;
 71 | 
 72 | typedef unsigned int uint;
 73 | typedef signed short int16;
 74 | 
 75 | typedef unsigned short ushort;
 76 | typedef unsigned short uint16;
 77 | 
 78 | typedef long long int64;
 79 | typedef unsigned long long uint64;
 80 | 
 81 | // IsPow2<X>::value is true if X is a power of 2.
 82 | template<int X> struct sIsPow2 {
 83 | 	enum { value = 0 == (X & (X - 1)) };
 84 | };
 85 | 
 86 | // Finds the base-2 logarithm of X. value is -1 if X is not a power of 2.
 87 | template<int X, bool roundUp = true> struct sLogPow2 { 
 88 | 	enum { extra = sIsPow2<X>::value ? 0 : (roundUp ? 1 : 0) };
 89 | 	enum { inner = sLogPow2<X / 2>::inner + 1 };
 90 | 	enum { value = inner + extra };
 91 | };
 92 | template<bool roundUp> struct sLogPow2<0, roundUp> {
 93 | 	enum { inner = 0 };
 94 | 	enum { value = 0 };
 95 | };
 96 | template<bool roundUp> struct sLogPow2<1, roundUp> { 
 97 | 	enum { inner = 0 };
 98 | 	enum { value = 0 };
 99 | };
100 | 
101 | template<int X, int Y>
102 | struct sDivUp {
103 | 	enum { value = (X + Y - 1) / Y };
104 | };
105 | 
106 | template<int count, int levels> struct sDiv2RoundUp {
107 | 	enum { value = sDiv2RoundUp<sDivUp<count, 2>::value, levels - 1>::value };
108 | };
109 | template<int count> struct sDiv2RoundUp<count, 0> {
110 | 	enum { value = count };
111 | };
112 | 
113 | template<int X, int Y>
114 | struct sDivSafe {
115 | 	enum { value = X / Y };
116 | };
117 | template<int X>
118 | struct sDivSafe<X, 0> {
119 | 	enum { value = 0 };
120 | };
121 | 
122 | template<int X, int Y>
123 | struct sRoundUp {
124 | 	enum { rem = X % Y };
125 | 	enum { value = X + (rem ? (Y - rem) : 0) };
126 | };
127 | 
128 | template<int X, int Y>
129 | struct sRoundDown {
130 | 	enum { rem = X % Y };
131 | 	enum { value = X - rem };
132 | };
133 | 
134 | // IntegerDiv is a template for avoiding divisions by zero in template 
135 | // evaluation. Templates always evaluate both b and c in an expression like
136 | // a ? b : c, and will error if either rhs contains an illegal expression,
137 | // even if the ternary is explictly designed to guard against that.
138 | template<int X, int Y>
139 | struct sIntegerDiv {
140 | 	enum { value = X / (Y ? Y : (X + 1)) };
141 | };
142 | 
143 | template<int X, int Y>
144 | struct sMax {
145 | 	enum { value = (X >= Y) ? X : Y };
146 | };
147 | template<int X, int Y>
148 | struct sMin {
149 | 	enum { value = (X <= Y) ? X : Y };
150 | };
151 | 
152 | template<int X>
153 | struct sAbs {
154 | 	enum { value = (X >= 0) ? X : -X };
155 | };
156 | 
157 | 
158 | // Finds the number of powers of 2 in the prime factorization of X.
159 | template<int X, int LSB = 1 & X> struct sNumFactorsOf2 {
160 | 	enum { shifted = X >> 1 };
161 | 	enum { value = 1 + sNumFactorsOf2<shifted>::value };
162 | };
163 | template<int X> struct sNumFactorsOf2<X, 1> {
164 | 	enum { value = 0 };
165 | };
166 | 
167 | // Returns the divisor for a conflict-free transpose.
168 | template<int X, int NumBanks = 32> struct sBankConflictDivisor {
169 | 	enum { value = 
170 | 		(1 & X) ? 0 : 
171 | 		(sIsPow2<X>::value ? NumBanks :
172 | 		(1<< sNumFactorsOf2<X>::value)) }; 
173 | 	enum { log_value = sLogPow2<value>::value };
174 | };
175 | 
176 | template<int NT, int X, int NumBanks = 32> struct sConflictFreeStorage {
177 | 	enum { count = NT * X };
178 | 	enum { divisor = sBankConflictDivisor<X, NumBanks>::value };
179 | 	enum { padding = sDivSafe<count, divisor>::value };
180 | 	enum { value = count + padding };
181 | };
182 | 
183 | } // namespace mgpu
184 | 


--------------------------------------------------------------------------------
/gpu-scanline/src/mochimazui/cuda_array.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #ifndef _MOCHIMAZUI_CURA_ARRAY_H_
  5 | #define _MOCHIMAZUI_CUDA_ARRAY_H_
  6 | 
  7 | #include <cstdio>
  8 | #include <cassert>
  9 | 
 10 | #include <vector>
 11 | #include <stdexcept>
 12 | #include <algorithm>
 13 | 
 14 | #include <cuda.h>
 15 | #include <cuda_runtime.h>
 16 | 
 17 | #ifdef _DEBUG
 18 | #define CTL_ASSERT(x) assert((x)==CUDA_SUCCESS)
 19 | #else
 20 | #define CTL_ASSERT(x) x
 21 | #endif
 22 | 
 23 | namespace CUDATL {
 24 | 
 25 | 	/*
 26 | 	enum ManagedCUDAArraySyncDirection {
 27 | 	HostToDevice,
 28 | 	DeviceToHost
 29 | 	};
 30 | 	*/
 31 | 
 32 | 	template<class T>
 33 | 	struct cuda_array_allocator {
 34 | 		static cudaError_t malloc(T**p, size_t s) {
 35 | 			return cudaMalloc(p, s);
 36 | 		}
 37 | 		static cudaError_t free(T*p) {
 38 | 			return cudaFree(p);
 39 | 		}
 40 | 	};
 41 | 
 42 | 	template<class T>
 43 | 	struct cuda_array_managed_allocator {
 44 | 		static cudaError malloc(T**p, size_t s) {
 45 | 			return cudaMallocManaged(p, s);
 46 | 		}
 47 | 		static cudaError_t free(T*p) {
 48 | 			return cudaFree(p);
 49 | 		}
 50 | 	};
 51 | 
 52 | 	template<class T>
 53 | 	struct cuda_array_host_allocator {
 54 | 		static cudaError malloc(T**p, size_t s) {
 55 | 			return cudaMallocHost(p, s);
 56 | 		}
 57 | 		static cudaError_t free(T*p) {
 58 | 			return cudaFreeHost(p);
 59 | 		}
 60 | 	};
 61 | 
 62 | 	// -------- -------- -------- -------- -------- -------- -------- --------
 63 | 	// @class cuda_array
 64 | 	template < class T, class allocator = cuda_array_allocator<T> >
 65 | 	class cuda_array {
 66 | 
 67 | 	public:
 68 | 		cuda_array()
 69 | 			:_size(0), _reservedSize(0), _gpuPointer(nullptr) {
 70 | 		}
 71 | 
 72 | 		~cuda_array() {
 73 | 			clear();
 74 | 		}
 75 | 
 76 | 	public:
 77 | 		size_t size() { return _size; }
 78 | 		size_t reserved() { return _reservedSize; }
 79 | 
 80 | 		void malloc(const size_t size) {
 81 | 			if (_gpuPointer) { 
 82 | 				cudaFree(_gpuPointer); 
 83 | 			}
 84 | 			CTL_ASSERT(allocator::malloc(&_gpuPointer, size * sizeof(T)));
 85 | 			_size = size;
 86 | 			_reservedSize = size;
 87 | 		}
 88 | 
 89 | 		//
 90 | 		//void resize(const size_t newSize) {
 91 | 		//	if (newSize <= _reservedSize) { _size = newSize; return; }
 92 | 		//	auto oldGPUPointer = _gpuPointer;
 93 | 		//	auto newReserved = std::max(newSize, _reservedSize);
 94 | 		//	allocator::malloc(&_gpuPointer, newReserved * sizeof(T));
 95 | 		//	if (!_gpuPointer) { throw std::runtime_error("cuda_array::resize: out of memory"); }
 96 | 		//	if (oldGPUPointer) {
 97 | 		//		auto oldByteSize = _size * sizeof(T);
 98 | 		//		cudaMemcpy(_gpuPointer, oldGPUPointer, oldByteSize, cudaMemcpyDeviceToDevice);
 99 | 		//		cudaFree(oldGPUPointer);
100 | 		//	}
101 | 		//	_size = newSize;
102 | 		//	_reservedSize = newReserved;
103 | 		//}
104 | 
105 | 		void resizeWithoutCopy(size_t newSize) {
106 | 			if (newSize <= _reservedSize) { _size = newSize; return; }
107 | 			if (_gpuPointer) {
108 | 				cudaFree(_gpuPointer);
109 | 				_gpuPointer = nullptr;
110 | 			}
111 | 			_size = newSize;
112 | 			newSize = (size_t)(newSize*1.5);
113 | 			allocator::malloc(&_gpuPointer, newSize * sizeof(T));
114 | 			if (!_gpuPointer) { throw std::runtime_error("cuda_array::resizWithoutCopy: out of memory"); }
115 | 			_reservedSize = newSize;
116 | 		}
117 | 
118 | 		//
119 | 		void clear() {
120 | 			if (_gpuPointer) { 
121 | 				allocator::free(_gpuPointer);
122 | 			}
123 | 			_gpuPointer = nullptr;
124 | 			_size = 0;
125 | 		}
126 | 
127 | 		// cpu -> gpu
128 | 		void set(const std::vector<T> &v) {
129 | 			set(v.data(), v.size());
130 | 		}
131 | 
132 | 		void set(const T* data, size_t size) {
133 | 			if (size > _size) { resizeWithoutCopy(size); }
134 | 			CTL_ASSERT(cudaMemcpy(_gpuPointer, data, size*sizeof(T), cudaMemcpyHostToDevice));
135 | 		}
136 | 
137 | 		void setAsync(const std::vector<T> &v) {
138 | 			setAsync(v.data(), v.size());
139 | 		}
140 | 
141 | 		void setAsync(const T* data, size_t size) {
142 | 			if (size > _size) { resizeWithoutCopy(size); }
143 | 			CTL_ASSERT(cudaMemcpyAsync(_gpuPointer, data, size*sizeof(T), cudaMemcpyHostToDevice));
144 | 		}
145 | 
146 | 		// gpu -> cpu
147 | 		void get(std::vector<T> &v) {
148 | 			v.resize(_size);
149 | 			get(v.data(), v.size());
150 | 		}
151 | 
152 | 		void get(T* data, size_t size) {
153 | 			if (size > _size) { size = _size; }
154 | 			CTL_ASSERT(cudaMemcpy(data, _gpuPointer, size*sizeof(T), cudaMemcpyDeviceToHost));
155 | 		}
156 | 
157 | 		void getAsync(std::vector<T> &v) {
158 | 			v.resize(_size);
159 | 			getAsync(v.data(), v.size());
160 | 		}
161 | 
162 | 		void getAsync(T* data, size_t size) {
163 | 			if (size > _size) { size = _size; }
164 | 			CTL_ASSERT(cudaMemcpyAsync(data, _gpuPointer, size*sizeof(T), cudaMemcpyDeviceToHost));
165 | 		}
166 | 
167 | 		// cpu -> gpu
168 | 		void setValue(int pos, const T &value) {
169 | 			if (pos >= _size) { resize(pos + 1); }
170 | 			CTL_ASSERT(cudaMemcpy(_gpuPointer + pos, &value, sizeof(T), cudaMemcpyHostToDevice));
171 | 		}
172 | 
173 | 		void setValueAsync(int pos, const T &value) {
174 | 			if (pos >= _size) { resize(pos + 1); }
175 | 			CTL_ASSERT(cudaMemcpyAsync(_gpuPointer + pos, &value, sizeof(T), cudaMemcpyHostToDevice));
176 | 		}
177 | 
178 | 		// gpu -> cpu
179 | 		T getValue(size_t pos) {
180 | 			T value;
181 | 			if (pos >= _size) { throw std::runtime_error("index out of range"); }
182 | 			CTL_ASSERT(cudaMemcpy(&value, _gpuPointer + pos, sizeof(T), cudaMemcpyDeviceToHost));
183 | 			return value;
184 | 		}
185 | 
186 | 		T* gpointer() const { return _gpuPointer; }
187 | 		T* cpointer() const { return nullptr; }
188 | 
189 | 		T* gptr() const { return _gpuPointer; }
190 | 		T* cptr() const { return nullptr; }
191 | 
192 | 	public:
193 | 		operator T* () { return _gpuPointer; }
194 | 
195 | 	private:
196 | 		size_t _size;
197 | 		size_t _reservedSize;
198 | 		T *_gpuPointer;
199 | 	};
200 | 
201 | 	// !!! If device support cudaMallocManaged, just use cuda_array with cuda_array_managed_allocator
202 | 	//template < class T, class  allocator = cuda_array_allocator<T>>
203 | 	//class cuda_array_managed : public cuda_array<T> {
204 | 	//};
205 | 
206 | 	template<class T>
207 | 	using cuda_device_array = cuda_array < T, cuda_array_allocator<T> >;
208 | 
209 | 	template<class T>
210 | 	using cuda_managed_array = cuda_array < T, cuda_array_managed_allocator<T> >;
211 | 
212 | 	template<class T>
213 | 	using cuda_host_array = cuda_array < T, cuda_array_host_allocator<T> >;
214 | 
215 | #ifdef _DEBUG
216 | 	template<class T>
217 | 	using CUDAArray = cuda_managed_array < T >;
218 | 
219 | 	template<class T>
220 | 	using CUDAHostArray = cuda_host_array < T >;
221 | 
222 | 	template<class T>
223 | 	using CUDAManagedArray = cuda_managed_array < T >;
224 | #else 
225 | 	template<class T>
226 | 	using CUDAArray = cuda_array < T >;
227 | 
228 | 	template<class T>
229 | 	using CUDAHostArray = cuda_host_array < T >;
230 | 
231 | 	template<class T>
232 | 	using CUDAManagedArray = cuda_managed_array < T >;
233 | #endif
234 | 
235 | }
236 | 
237 | #endif
238 | 


--------------------------------------------------------------------------------