├── README.md ├── cpp-native ├── CMakeLists.txt ├── setup.py ├── std-wrapper.cpp ├── std.cpp └── std.h ├── cython-class ├── setup.py ├── std.cpp ├── std.h └── stdcy.pyx ├── cython-thread ├── CMakeLists.txt ├── setup.py ├── std.cpp ├── std.h └── stdcy.pyx ├── cython ├── setup.py ├── std.cpp ├── std.h └── stdcy.pyx ├── pybind ├── CMakeLists.txt ├── setup.py ├── std.cpp ├── std.h └── stdpy.cpp ├── test-perf.py ├── test-thread.py └── test-val.py /README.md: -------------------------------------------------------------------------------- 1 | # PythonCExtensions 2 | Using a Python C extension and comparing its performance to NumPy and Python 3 | 4 | You need to build the C extension before running this. To build and install the extension run: 5 | 6 | ``` 7 | $ python3 setup.py install 8 | ``` 9 | 10 | stdtest.py has a main method which will compare Python, NumPy and the C extension for small arrays and plot the performance with matplotlib. 11 | 12 | *I've modified the wrapper to work both Python 2 and 3.* 13 | 14 | ## Results 15 | 16 | 17 | 18 | 19 | 20 | 21 | ## Dependencies 22 | ### Cython 23 | ``` 24 | $ pip3 install Cython 25 | ``` 26 | ### Pybind11 27 | ``` 28 | $ brew install pybind11 29 | ``` 30 | 31 | ## References 32 | - [Speeding up Python and NumPy: C++ing the Way](https://medium.com/coding-with-clarity/speeding-up-python-and-numpy-c-ing-the-way-3b9658ed78f4) 33 | - [NumPy와 C++ Extensions의 성능 비교](http://docs.likejazz.com/python-numpy-extensions/) 34 | -------------------------------------------------------------------------------- /cpp-native/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | project(std) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | set(SOURCE_FILES 7 | std-wrapper.cpp 8 | std.cpp 9 | std.h) 10 | 11 | include_directories(${PYTHON_INCLUDE_DIRS}) 12 | 13 | add_executable(std ${SOURCE_FILES}) 14 | -------------------------------------------------------------------------------- /cpp-native/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | ext = Extension('std', 4 | sources=[ 5 | 'std.cpp', 6 | 'std-wrapper.cpp', 7 | ], 8 | ) 9 | 10 | setup(name='std_performance', 11 | version='1.0', 12 | description='Module for calculating standard deviation.', 13 | ext_modules=[ext]) 14 | -------------------------------------------------------------------------------- /cpp-native/std-wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "std.h" 3 | 4 | struct module_state { 5 | PyObject *error; 6 | }; 7 | 8 | #if PY_MAJOR_VERSION >= 3 9 | #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) 10 | #else 11 | #define GETSTATE(m) (&_state) 12 | static struct module_state _state; 13 | 14 | extern "C" { 15 | void initstd(void); 16 | } 17 | #endif 18 | 19 | static PyObject *error_out(PyObject * m) { 20 | struct module_state *st = GETSTATE(m); 21 | PyErr_SetString(st->error, "something bad happened"); 22 | return NULL; 23 | } 24 | 25 | static PyObject *std_standard_dev(PyObject * self, PyObject * args) { 26 | PyObject * input; 27 | PyArg_ParseTuple(args, "O", &input); 28 | 29 | int size = PyList_Size(input); 30 | 31 | std::vector list; 32 | list.resize(size); 33 | 34 | for (int i = 0; i < size; i++) { 35 | list[i] = PyFloat_AS_DOUBLE(PyList_GET_ITEM(input, i)); 36 | } 37 | 38 | return PyFloat_FromDouble(standardDeviation(list)); 39 | } 40 | 41 | static PyMethodDef std_methods[] = { 42 | {"error_out", (PyCFunction) error_out, METH_NOARGS, NULL}, 43 | {"standard_dev", std_standard_dev, METH_VARARGS, "Return the standard deviation of a list."}, 44 | {NULL, NULL} 45 | }; 46 | 47 | #if PY_MAJOR_VERSION >= 3 48 | 49 | static int std_traverse(PyObject * m, visitproc visit, void *arg) { 50 | Py_VISIT(GETSTATE(m)->error); 51 | return 0; 52 | } 53 | 54 | static int std_clear(PyObject * m) { 55 | Py_CLEAR(GETSTATE(m)->error); 56 | return 0; 57 | } 58 | 59 | 60 | static struct PyModuleDef moduledef = { 61 | PyModuleDef_HEAD_INIT, 62 | "std", 63 | NULL, 64 | sizeof(struct module_state), 65 | std_methods, 66 | NULL, 67 | std_traverse, 68 | std_clear, 69 | NULL 70 | }; 71 | 72 | #define INITERROR return NULL 73 | 74 | PyMODINIT_FUNC PyInit_std(void) 75 | 76 | #else 77 | #define INITERROR return 78 | 79 | void initstd(void) 80 | #endif 81 | { 82 | #if PY_MAJOR_VERSION >= 3 83 | PyObject * module = PyModule_Create(&moduledef); 84 | 85 | if (module == NULL) 86 | INITERROR; 87 | struct module_state *st = GETSTATE(module); 88 | 89 | st->error = PyErr_NewException("std.Error", NULL, NULL); 90 | if (st->error == NULL) { 91 | Py_DECREF(module); 92 | INITERROR; 93 | } 94 | #else 95 | Py_InitModule("std", std_methods); 96 | #endif 97 | 98 | #if PY_MAJOR_VERSION >= 3 99 | return module; 100 | #endif 101 | } 102 | -------------------------------------------------------------------------------- /cpp-native/std.cpp: -------------------------------------------------------------------------------- 1 | #include "std.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | double standardDeviation(std::vector v) { 9 | double sum = std::accumulate(v.begin(), v.end(), 0.0); 10 | double mean = sum / v.size(); 11 | 12 | double squareSum = std::inner_product(v.begin(), v.end(), v.begin(), 0.0); 13 | return sqrt(squareSum / v.size() - mean * mean); 14 | } 15 | -------------------------------------------------------------------------------- /cpp-native/std.h: -------------------------------------------------------------------------------- 1 | #ifndef INIT_STD_H 2 | #define INIT_STD_H 3 | 4 | #include 5 | 6 | double standardDeviation(std::vector); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /cython-class/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | 4 | ext = Extension('stdcyc', 5 | sources=[ 6 | 'stdcy.pyx', 7 | 'std.cpp', 8 | ], 9 | language='c++', 10 | ) 11 | 12 | setup(name='std_performance_cyc', 13 | version='1.0', 14 | description='Module for calculating standard deviation.', 15 | ext_modules=cythonize(ext)) 16 | -------------------------------------------------------------------------------- /cython-class/std.cpp: -------------------------------------------------------------------------------- 1 | #include "std.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | stddev::stddev(std::vector v) { 9 | vs = v; 10 | } 11 | 12 | double stddev::standardDeviation() { 13 | std::vector v = vs; 14 | 15 | double sum = std::accumulate(v.begin(), v.end(), 0.0); 16 | double mean = sum / v.size(); 17 | 18 | double squareSum = std::inner_product(v.begin(), v.end(), v.begin(), 0.0); 19 | return sqrt(squareSum / v.size() - mean * mean); 20 | } 21 | -------------------------------------------------------------------------------- /cython-class/std.h: -------------------------------------------------------------------------------- 1 | #ifndef INIT_STD_H 2 | #define INIT_STD_H 3 | 4 | #include 5 | 6 | class stddev { 7 | private: 8 | std::vector vs; 9 | public: 10 | stddev(std::vector v); 11 | double standardDeviation(); 12 | }; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /cython-class/stdcy.pyx: -------------------------------------------------------------------------------- 1 | from libcpp.vector cimport vector 2 | 3 | cdef extern from "std.h": 4 | cdef cppclass stddev: 5 | stddev(vector[double]) except + 6 | vector[double] vs 7 | double standardDeviation() 8 | 9 | 10 | cdef class pystd: 11 | cdef stddev *thisptr; 12 | 13 | def __init__(self, vector[double] v): 14 | self.thisptr = new stddev(v) 15 | 16 | def __del__(self): 17 | del self.thisptr 18 | 19 | def standard_dev(self): 20 | return self.thisptr.standardDeviation() 21 | -------------------------------------------------------------------------------- /cython-thread/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | project(stdcyt) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | set(SOURCE_FILES 7 | std.h 8 | std.cpp 9 | ) 10 | 11 | include_directories(${PYTHON_INCLUDE_DIRS}) 12 | 13 | add_executable(stdcyt ${SOURCE_FILES}) 14 | -------------------------------------------------------------------------------- /cython-thread/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | 4 | ext = Extension('stdcyt', 5 | sources=[ 6 | 'stdcy.pyx', 7 | 'std.cpp', 8 | ], 9 | language='c++', 10 | extra_compile_args=['-std=c++11'], 11 | ) 12 | 13 | setup(name='std_performance_cyt', 14 | version='1.0', 15 | description='Module for calculating standard deviation.', 16 | ext_modules=cythonize(ext)) 17 | -------------------------------------------------------------------------------- /cython-thread/std.cpp: -------------------------------------------------------------------------------- 1 | #include "std.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | const int NUM_THREADS = 8; 13 | 14 | stddev::stddev(std::vector v) { 15 | vs = v; 16 | } 17 | 18 | void stddev::calc(const std::vector *v, int start, int end, double &sum, double &squaredSum) { 19 | double th_sum = 0; 20 | double th_squaredSum = 0; 21 | 22 | for (int i = start; i < end; i++) { 23 | double it = (*v).at(i); 24 | 25 | th_sum += it; 26 | th_squaredSum += it * it; 27 | } 28 | 29 | sum += th_sum; 30 | squaredSum += th_squaredSum; 31 | } 32 | 33 | double stddev::standardDeviation() { 34 | auto m1_1 = std::chrono::steady_clock::now(); 35 | 36 | // If you use a pointer, you can improve performance. 37 | std::vector v = vs; 38 | 39 | auto m1_2 = std::chrono::duration_cast 40 | (std::chrono::steady_clock::now() - m1_1); 41 | std::cout << "[C++] std::vector loading: " << m1_2.count() << " ms" << std::endl; 42 | 43 | std::vector ths; 44 | 45 | double sum = 0; 46 | double squaredSum = 0; 47 | 48 | std::cout << "[C++] NUM_THREADS: " << NUM_THREADS << std::endl; 49 | 50 | auto m2_1 = std::chrono::steady_clock::now(); 51 | 52 | // Start threads. 53 | for (int i = 0; i < NUM_THREADS; ++i) { 54 | int start = i * round(v.size() / NUM_THREADS); 55 | int end = (i == NUM_THREADS - 1) ? v.size() : (i + 1) * round(v.size() / NUM_THREADS); 56 | 57 | ths.push_back(std::thread(&stddev::calc, this, &v, start, end, std::ref(sum), std::ref(squaredSum))); 58 | } 59 | 60 | // Waiting for threads to finish. 61 | for (auto &t : ths) { 62 | t.join(); 63 | } 64 | auto m2_2 = std::chrono::duration_cast 65 | (std::chrono::steady_clock::now() - m2_1); 66 | std::cout << "[C++] thread execution elapsed: " << m2_2.count() << " ms" << std::endl; 67 | 68 | double mean = sum / v.size(); 69 | return sqrt(squaredSum / v.size() - mean * mean); 70 | } 71 | -------------------------------------------------------------------------------- /cython-thread/std.h: -------------------------------------------------------------------------------- 1 | #ifndef INIT_STD_H 2 | #define INIT_STD_H 3 | 4 | #include 5 | 6 | class stddev { 7 | private: 8 | std::vector vs; 9 | public: 10 | stddev(std::vector); 11 | 12 | void calc(const std::vector *, int, int, double &, double &); 13 | 14 | double standardDeviation(); 15 | }; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /cython-thread/stdcy.pyx: -------------------------------------------------------------------------------- 1 | from libcpp.vector cimport vector 2 | 3 | cdef extern from "std.h": 4 | cdef cppclass stddev: 5 | stddev(vector[double]) except + 6 | vector[double] vs 7 | double standardDeviation() 8 | 9 | 10 | cdef class pystd: 11 | cdef stddev *thisptr; 12 | 13 | def __init__(self, vector[double] v): 14 | self.thisptr = new stddev(v) 15 | 16 | def __del__(self): 17 | del self.thisptr 18 | 19 | def standard_dev(self): 20 | return self.thisptr.standardDeviation() 21 | -------------------------------------------------------------------------------- /cython/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | 4 | ext = Extension('stdcy', 5 | sources=[ 6 | 'stdcy.pyx', 7 | 'std.cpp', 8 | ], 9 | language='c++', 10 | ) 11 | 12 | setup(name='std_performance_cy', 13 | version='1.0', 14 | description='Module for calculating standard deviation.', 15 | ext_modules=cythonize(ext)) 16 | -------------------------------------------------------------------------------- /cython/std.cpp: -------------------------------------------------------------------------------- 1 | #include "std.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | double standardDeviation(std::vector v) { 9 | double sum = std::accumulate(v.begin(), v.end(), 0.0); 10 | double mean = sum / v.size(); 11 | 12 | double squareSum = std::inner_product(v.begin(), v.end(), v.begin(), 0.0); 13 | return sqrt(squareSum / v.size() - mean * mean); 14 | } 15 | -------------------------------------------------------------------------------- /cython/std.h: -------------------------------------------------------------------------------- 1 | #ifndef INIT_STD_H 2 | #define INIT_STD_H 3 | 4 | #include 5 | 6 | double standardDeviation(std::vector); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /cython/stdcy.pyx: -------------------------------------------------------------------------------- 1 | from libcpp.vector cimport vector 2 | 3 | cdef extern from "std.h": 4 | double standardDeviation(vector[double]) 5 | 6 | def standard_dev(lst): 7 | # This pre-conversion has some performance improvements. 8 | cdef vector[double] v = lst 9 | 10 | return standardDeviation(v) 11 | -------------------------------------------------------------------------------- /pybind/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | project(stdpy) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | set(SOURCE_FILES 7 | stdpy.cpp 8 | std.h 9 | std.cpp 10 | ) 11 | 12 | include_directories(${PYTHON_INCLUDE_DIRS}) 13 | 14 | add_executable(stdpy ${SOURCE_FILES}) 15 | -------------------------------------------------------------------------------- /pybind/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | 3 | import pybind11 4 | 5 | ext = Extension( 6 | 'stdpy', 7 | sources=[ 8 | 'stdpy.cpp', 9 | 'std.cpp' 10 | ], 11 | include_dirs=[ 12 | pybind11.get_include(), 13 | pybind11.get_include(True), 14 | ], 15 | language='c++', 16 | extra_compile_args=['-std=c++11', '-stdlib=libc++'], 17 | ) 18 | 19 | setup(name='std_performance_py', 20 | version='1.0', 21 | description='Module for calculating standard deviation.', 22 | install_requires=['pybind11>=2.2'], 23 | ext_modules=[ext]) 24 | -------------------------------------------------------------------------------- /pybind/std.cpp: -------------------------------------------------------------------------------- 1 | #include "std.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | double standardDeviation(std::vector v) { 9 | double sum = std::accumulate(v.begin(), v.end(), 0.0); 10 | double mean = sum / v.size(); 11 | 12 | double squareSum = std::inner_product(v.begin(), v.end(), v.begin(), 0.0); 13 | return sqrt(squareSum / v.size() - mean * mean); 14 | } 15 | -------------------------------------------------------------------------------- /pybind/std.h: -------------------------------------------------------------------------------- 1 | #ifndef INIT_STD_H 2 | #define INIT_STD_H 3 | 4 | #include 5 | 6 | double standardDeviation(std::vector); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /pybind/stdpy.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include // Support for auto-conversion. 3 | 4 | #include "std.h" 5 | 6 | // If you don't include the std.cpp file in the build, 7 | // an error occurs: `expected in: flat namespace`. 8 | PYBIND11_MODULE(stdpy, m) { 9 | m.def("standard_dev", &standardDeviation); 10 | } 11 | -------------------------------------------------------------------------------- /test-perf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import timeit 4 | import math 5 | import pandas as pd 6 | from matplotlib import pyplot as plt 7 | import seaborn as sns 8 | import std 9 | import stdcy 10 | import stdcyc 11 | import stdpy 12 | 13 | 14 | def mean(lst): 15 | return sum(lst) / len(lst) 16 | 17 | 18 | def standard_deviation(lst): 19 | m = mean(lst) 20 | variance = sum([(value - m) ** 2 for value in lst]) 21 | return math.sqrt(variance / len(lst)) 22 | 23 | 24 | if __name__ == '__main__': 25 | # start = 1; end = 1000; step = 10; include_pure_py = True 26 | start = 1; end = 100000; step = 2000; include_pure_py = False 27 | lens = range(start, end + 1, step) 28 | 29 | py_time = [] 30 | np_time = [] 31 | c_time = [] 32 | cy_time = [] 33 | cyc_time = [] 34 | pybind_time = [] 35 | 36 | for l in lens: 37 | rands = [random.random() for _ in range(0, l)] 38 | 39 | numpy_rands = np.array(rands) # If you don't do this, It will convert every time 40 | # and there will be a some performance issues. 41 | cyc_rands = stdcyc.pystd(rands) 42 | 43 | np_time = np.append(np_time, timeit.timeit(lambda: np.std(numpy_rands), number=100)) 44 | c_time = np.append(c_time, timeit.timeit(lambda: std.standard_dev(rands), number=100)) 45 | cy_time = np.append(cy_time, timeit.timeit(lambda: stdcy.standard_dev(rands), number=100)) 46 | cyc_time = np.append(cyc_time, timeit.timeit(lambda: cyc_rands.standard_dev(), number=100)) 47 | pybind_time = np.append(pybind_time, timeit.timeit(lambda: stdpy.standard_dev(rands), number=100)) 48 | 49 | if include_pure_py: 50 | py_time = np.append(py_time, timeit.timeit(lambda: standard_deviation(rands), number=100)) 51 | 52 | data = np.array([np.transpose(np_time), np.transpose(c_time), np.transpose(cy_time), 53 | np.transpose(cyc_time), np.transpose(pybind_time), np.transpose(py_time)]) 54 | else: 55 | data = np.array([np.transpose(np_time), np.transpose(c_time), np.transpose(cy_time), 56 | np.transpose(cyc_time), np.transpose(pybind_time)]) 57 | 58 | if include_pure_py: 59 | df = pd.DataFrame(data.transpose(), index=lens, columns=['Numpy', 'C++', 'Cython', 'Cython w/ class', 60 | 'pybind11', 'Python']) 61 | else: 62 | df = pd.DataFrame(data.transpose(), index=lens, columns=['Numpy', 'C++', 'Cython', 'Cython w/ class', 63 | 'pybind11']) 64 | 65 | plt.figure() 66 | df.plot() 67 | plt.legend(loc='best') 68 | plt.ylabel('Time (Seconds)') 69 | plt.xlabel('Number of Elements') 70 | plt.title('100 Runs of Standard Deviation') 71 | plt.show() 72 | -------------------------------------------------------------------------------- /test-thread.py: -------------------------------------------------------------------------------- 1 | import stdcyc 2 | import stdcyt 3 | import time 4 | import timeit 5 | 6 | import random 7 | 8 | def elapsed(t): 9 | return str(round(t, 3)) + " seconds." 10 | 11 | random.seed(100) 12 | 13 | item_size = 200000000 14 | 15 | print('{:,} elements'.format(item_size)) 16 | print('--') 17 | 18 | m1_1 = time.time() 19 | rands = [random.random() for _ in range(0, item_size)] 20 | m1_2 = time.time() 21 | print('generate rands: ', elapsed(m1_2 - m1_1)) 22 | 23 | # m3_1 = time.time() 24 | # cyc_rands = stdcyc.pystd(rands) 25 | # m3_2 = time.time() 26 | m2_1 = time.time() 27 | cyt_rands = stdcyt.pystd(rands) 28 | m2_2 = time.time() 29 | 30 | # print('type conversion(stdcyc): ', elapsed(m3_2 - m3_1)) 31 | print('type conversion(stdcyt): ', elapsed(m2_2 - m2_1)) 32 | 33 | print() 34 | 35 | # print('stdcyc elapsed: ', elapsed(timeit.timeit(lambda: cyc_rands.standard_dev(), number=1))) 36 | print('stdcyt elapsed: ', elapsed(timeit.timeit(lambda: cyt_rands.standard_dev(), number=1))) 37 | -------------------------------------------------------------------------------- /test-val.py: -------------------------------------------------------------------------------- 1 | import std 2 | import stdcy 3 | import stdcyc 4 | import stdcyt 5 | import stdpy 6 | import numpy as np 7 | 8 | import random 9 | 10 | random.seed(100) 11 | rands = [random.random() for _ in range(0, 5)] 12 | cyc_rands = stdcyc.pystd(rands) 13 | cyt_rands = stdcyt.pystd(rands) 14 | 15 | print('std: ', std.standard_dev(rands)) 16 | print('stdcy: ', stdcy.standard_dev(rands)) 17 | print('stdcyc: ', cyc_rands.standard_dev()) 18 | print('stdcyt: ', cyt_rands.standard_dev()) 19 | print('stdpy: ', stdpy.standard_dev(rands)) 20 | print('numpy: ', np.std(rands)) 21 | --------------------------------------------------------------------------------